drm/amdgpu: add macro to retrieve timeline name v2
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
aeab2032 28#include "vi_structs.h"
aaa36a97
AD
29#include "vid.h"
30#include "amdgpu_ucode.h"
68182d90 31#include "amdgpu_atombios.h"
eeade25a 32#include "atombios_i2c.h"
aaa36a97
AD
33#include "clearstate_vi.h"
34
35#include "gmc/gmc_8_2_d.h"
36#include "gmc/gmc_8_2_sh_mask.h"
37
38#include "oss/oss_3_0_d.h"
39#include "oss/oss_3_0_sh_mask.h"
40
41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h"
43
44#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h"
47#include "gca/gfx_8_0_enum.h"
48
aaa36a97
AD
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
d9d533c1
KW
52#include "smu/smu_7_1_3_d.h"
53
aaa36a97
AD
54#define GFX8_NUM_GFX_RINGS 1
55#define GFX8_NUM_COMPUTE_RINGS 8
56
57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
6e378858
EH
72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
78
79/* BPM SERDES CMD */
80#define SET_BPM_SERDES_CMD 1
81#define CLE_BPM_SERDES_CMD 0
82
83/* BPM Register Address*/
84enum {
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
90 BPM_REG_FGCG_MAX
91};
92
2b6cd977
EH
93#define RLC_FormatDirectRegListLength 14
94
c65444fe
JZ
95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
e3c7656c
SL
102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
c65444fe
JZ
108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 120
af15a2d5
DZ
121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
2cc0c0b5
FC
128MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
68182d90 134
2cc0c0b5
FC
135MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 141
c4642a47
JZ
142MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
aaa36a97
AD
149static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150{
151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167};
168
169static const u32 golden_settings_tonga_a11[] =
170{
171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
ff9d6460 178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
187};
188
189static const u32 tonga_golden_common_all[] =
190{
191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199};
200
201static const u32 tonga_mgcg_cgcg_init[] =
202{
203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278};
279
2cc0c0b5 280static const u32 golden_settings_polaris11_a11[] =
68182d90 281{
9761bc53
HR
282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
9761bc53 292 mmSQ_CONFIG, 0x07f80000, 0x01180000,
68182d90
FC
293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
299};
300
2cc0c0b5 301static const u32 polaris11_golden_common_all[] =
68182d90
FC
302{
303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309};
310
2cc0c0b5 311static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
312{
313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
a5a5e308
HR
314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
330};
331
2cc0c0b5 332static const u32 polaris10_golden_common_all[] =
68182d90
FC
333{
334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342};
343
af15a2d5
DZ
344static const u32 fiji_golden_common_all[] =
345{
346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
356};
357
358static const u32 golden_settings_fiji_a10[] =
359{
360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
367 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
371};
372
373static const u32 fiji_mgcg_cgcg_init[] =
374{
a7ca8ef9 375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410};
411
aaa36a97
AD
412static const u32 golden_settings_iceland_a11[] =
413{
414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
fe85f07f 422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430};
431
432static const u32 iceland_golden_common_all[] =
433{
434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442};
443
444static const u32 iceland_mgcg_cgcg_init[] =
445{
446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510};
511
512static const u32 cz_golden_settings_a11[] =
513{
514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
3a494b58 519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
3a494b58 522 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526};
527
528static const u32 cz_golden_common_all[] =
529{
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538};
539
540static const u32 cz_mgcg_cgcg_init[] =
541{
542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617};
618
e3c7656c
SL
619static const u32 stoney_golden_settings_a11[] =
620{
621 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 627 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631};
632
633static const u32 stoney_golden_common_all[] =
634{
635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643};
644
645static const u32 stoney_mgcg_cgcg_init[] =
646{
647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
e3c7656c
SL
652};
653
aaa36a97
AD
654static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 657static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 658static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 659static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
acad2b2a
ML
660static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
5ff98043
ML
662static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev);
663static void gfx_v8_0_compute_mqd_soft_fini(struct amdgpu_device *adev);
aaa36a97
AD
664
665static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666{
667 switch (adev->asic_type) {
668 case CHIP_TOPAZ:
669 amdgpu_program_register_sequence(adev,
670 iceland_mgcg_cgcg_init,
671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672 amdgpu_program_register_sequence(adev,
673 golden_settings_iceland_a11,
674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675 amdgpu_program_register_sequence(adev,
676 iceland_golden_common_all,
677 (const u32)ARRAY_SIZE(iceland_golden_common_all));
678 break;
af15a2d5
DZ
679 case CHIP_FIJI:
680 amdgpu_program_register_sequence(adev,
681 fiji_mgcg_cgcg_init,
682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 golden_settings_fiji_a10,
685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686 amdgpu_program_register_sequence(adev,
687 fiji_golden_common_all,
688 (const u32)ARRAY_SIZE(fiji_golden_common_all));
689 break;
690
aaa36a97
AD
691 case CHIP_TONGA:
692 amdgpu_program_register_sequence(adev,
693 tonga_mgcg_cgcg_init,
694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695 amdgpu_program_register_sequence(adev,
696 golden_settings_tonga_a11,
697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698 amdgpu_program_register_sequence(adev,
699 tonga_golden_common_all,
700 (const u32)ARRAY_SIZE(tonga_golden_common_all));
701 break;
2cc0c0b5 702 case CHIP_POLARIS11:
c4642a47 703 case CHIP_POLARIS12:
68182d90 704 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
705 golden_settings_polaris11_a11,
706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
68182d90 707 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
708 polaris11_golden_common_all,
709 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
68182d90 710 break;
2cc0c0b5 711 case CHIP_POLARIS10:
68182d90 712 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
713 golden_settings_polaris10_a11,
714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
68182d90 715 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
716 polaris10_golden_common_all,
717 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
5765a36d
RZ
719 if (adev->pdev->revision == 0xc7 &&
720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
eeade25a
KW
723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725 }
68182d90 726 break;
aaa36a97
AD
727 case CHIP_CARRIZO:
728 amdgpu_program_register_sequence(adev,
729 cz_mgcg_cgcg_init,
730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_settings_a11,
733 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734 amdgpu_program_register_sequence(adev,
735 cz_golden_common_all,
736 (const u32)ARRAY_SIZE(cz_golden_common_all));
737 break;
e3c7656c
SL
738 case CHIP_STONEY:
739 amdgpu_program_register_sequence(adev,
740 stoney_mgcg_cgcg_init,
741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_settings_a11,
744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745 amdgpu_program_register_sequence(adev,
746 stoney_golden_common_all,
747 (const u32)ARRAY_SIZE(stoney_golden_common_all));
748 break;
aaa36a97
AD
749 default:
750 break;
751 }
752}
753
754static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755{
aaa36a97
AD
756 adev->gfx.scratch.num_reg = 7;
757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
50261151 758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
aaa36a97
AD
759}
760
761static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762{
763 struct amdgpu_device *adev = ring->adev;
764 uint32_t scratch;
765 uint32_t tmp = 0;
766 unsigned i;
767 int r;
768
769 r = amdgpu_gfx_scratch_get(adev, &scratch);
770 if (r) {
771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772 return r;
773 }
774 WREG32(scratch, 0xCAFEDEAD);
a27de35c 775 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
776 if (r) {
777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778 ring->idx, r);
779 amdgpu_gfx_scratch_free(adev, scratch);
780 return r;
781 }
782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 785 amdgpu_ring_commit(ring);
aaa36a97
AD
786
787 for (i = 0; i < adev->usec_timeout; i++) {
788 tmp = RREG32(scratch);
789 if (tmp == 0xDEADBEEF)
790 break;
791 DRM_UDELAY(1);
792 }
793 if (i < adev->usec_timeout) {
794 DRM_INFO("ring test on %d succeeded in %d usecs\n",
795 ring->idx, i);
796 } else {
797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798 ring->idx, scratch, tmp);
799 r = -EINVAL;
800 }
801 amdgpu_gfx_scratch_free(adev, scratch);
802 return r;
803}
804
bbec97aa 805static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
aaa36a97
AD
806{
807 struct amdgpu_device *adev = ring->adev;
808 struct amdgpu_ib ib;
f54d1867 809 struct dma_fence *f = NULL;
aaa36a97
AD
810 uint32_t scratch;
811 uint32_t tmp = 0;
bbec97aa 812 long r;
aaa36a97
AD
813
814 r = amdgpu_gfx_scratch_get(adev, &scratch);
815 if (r) {
bbec97aa 816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
aaa36a97
AD
817 return r;
818 }
819 WREG32(scratch, 0xCAFEDEAD);
b203dd95 820 memset(&ib, 0, sizeof(ib));
b07c60c0 821 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97 822 if (r) {
bbec97aa 823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
42d13693 824 goto err1;
aaa36a97
AD
825 }
826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828 ib.ptr[2] = 0xDEADBEEF;
829 ib.length_dw = 3;
42d13693 830
50ddc75e 831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
42d13693
CZ
832 if (r)
833 goto err2;
834
f54d1867 835 r = dma_fence_wait_timeout(f, false, timeout);
bbec97aa
CK
836 if (r == 0) {
837 DRM_ERROR("amdgpu: IB test timed out.\n");
838 r = -ETIMEDOUT;
839 goto err2;
840 } else if (r < 0) {
841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
42d13693 842 goto err2;
aaa36a97 843 }
6d44565d
CK
844 tmp = RREG32(scratch);
845 if (tmp == 0xDEADBEEF) {
846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
bbec97aa 847 r = 0;
aaa36a97
AD
848 } else {
849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850 scratch, tmp);
851 r = -EINVAL;
852 }
42d13693 853err2:
cc55c45d 854 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 855 dma_fence_put(f);
42d13693
CZ
856err1:
857 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
858 return r;
859}
860
13331ac3
ML
861
862static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863 release_firmware(adev->gfx.pfp_fw);
864 adev->gfx.pfp_fw = NULL;
865 release_firmware(adev->gfx.me_fw);
866 adev->gfx.me_fw = NULL;
867 release_firmware(adev->gfx.ce_fw);
868 adev->gfx.ce_fw = NULL;
869 release_firmware(adev->gfx.rlc_fw);
870 adev->gfx.rlc_fw = NULL;
871 release_firmware(adev->gfx.mec_fw);
872 adev->gfx.mec_fw = NULL;
873 if ((adev->asic_type != CHIP_STONEY) &&
874 (adev->asic_type != CHIP_TOPAZ))
875 release_firmware(adev->gfx.mec2_fw);
876 adev->gfx.mec2_fw = NULL;
877
878 kfree(adev->gfx.rlc.register_list_format);
879}
880
aaa36a97
AD
881static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882{
883 const char *chip_name;
884 char fw_name[30];
885 int err;
886 struct amdgpu_firmware_info *info = NULL;
887 const struct common_firmware_header *header = NULL;
595fd013 888 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
889 const struct rlc_firmware_header_v2_0 *rlc_hdr;
890 unsigned int *tmp = NULL, i;
aaa36a97
AD
891
892 DRM_DEBUG("\n");
893
894 switch (adev->asic_type) {
895 case CHIP_TOPAZ:
896 chip_name = "topaz";
897 break;
898 case CHIP_TONGA:
899 chip_name = "tonga";
900 break;
901 case CHIP_CARRIZO:
902 chip_name = "carrizo";
903 break;
af15a2d5
DZ
904 case CHIP_FIJI:
905 chip_name = "fiji";
906 break;
2cc0c0b5
FC
907 case CHIP_POLARIS11:
908 chip_name = "polaris11";
68182d90 909 break;
2cc0c0b5
FC
910 case CHIP_POLARIS10:
911 chip_name = "polaris10";
68182d90 912 break;
c4642a47
JZ
913 case CHIP_POLARIS12:
914 chip_name = "polaris12";
915 break;
e3c7656c
SL
916 case CHIP_STONEY:
917 chip_name = "stoney";
918 break;
aaa36a97
AD
919 default:
920 BUG();
921 }
922
c65444fe 923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925 if (err)
926 goto out;
927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928 if (err)
929 goto out;
595fd013
JZ
930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 933
c65444fe 934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 if (err)
937 goto out;
938 err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 if (err)
940 goto out;
595fd013
JZ
941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
ae65a26d
ML
943
944 /* chain ib ucode isn't formal released, just disable it by far
945 * TODO: when ucod ready we should use ucode version to judge if
946 * chain-ib support or not.
947 */
948 adev->virt.chained_ib_support = false;
949
595fd013 950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 951
c65444fe 952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
953 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
954 if (err)
955 goto out;
956 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
957 if (err)
958 goto out;
595fd013
JZ
959 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 962
c65444fe 963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
965 if (err)
966 goto out;
967 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
968 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
971
972 adev->gfx.rlc.save_and_restore_offset =
973 le32_to_cpu(rlc_hdr->save_and_restore_offset);
974 adev->gfx.rlc.clear_state_descriptor_offset =
975 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976 adev->gfx.rlc.avail_scratch_ram_locations =
977 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978 adev->gfx.rlc.reg_restore_list_size =
979 le32_to_cpu(rlc_hdr->reg_restore_list_size);
980 adev->gfx.rlc.reg_list_format_start =
981 le32_to_cpu(rlc_hdr->reg_list_format_start);
982 adev->gfx.rlc.reg_list_format_separate_start =
983 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984 adev->gfx.rlc.starting_offsets_start =
985 le32_to_cpu(rlc_hdr->starting_offsets_start);
986 adev->gfx.rlc.reg_list_format_size_bytes =
987 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988 adev->gfx.rlc.reg_list_size_bytes =
989 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
990
991 adev->gfx.rlc.register_list_format =
992 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
994
995 if (!adev->gfx.rlc.register_list_format) {
996 err = -ENOMEM;
997 goto out;
998 }
999
ae17c999 1000 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1001 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1004
1005 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1006
ae17c999 1007 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1008 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 1011
c65444fe 1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
1013 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1014 if (err)
1015 goto out;
1016 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1017 if (err)
1018 goto out;
595fd013
JZ
1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1022
97dde76a
AD
1023 if ((adev->asic_type != CHIP_STONEY) &&
1024 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1027 if (!err) {
1028 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1029 if (err)
1030 goto out;
1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032 adev->gfx.mec2_fw->data;
1033 adev->gfx.mec2_fw_version =
1034 le32_to_cpu(cp_hdr->header.ucode_version);
1035 adev->gfx.mec2_feature_version =
1036 le32_to_cpu(cp_hdr->ucode_feature_version);
1037 } else {
1038 err = 0;
1039 adev->gfx.mec2_fw = NULL;
1040 }
aaa36a97
AD
1041 }
1042
1043 if (adev->firmware.smu_load) {
1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046 info->fw = adev->gfx.pfp_fw;
1047 header = (const struct common_firmware_header *)info->fw->data;
1048 adev->firmware.fw_size +=
1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053 info->fw = adev->gfx.me_fw;
1054 header = (const struct common_firmware_header *)info->fw->data;
1055 adev->firmware.fw_size +=
1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060 info->fw = adev->gfx.ce_fw;
1061 header = (const struct common_firmware_header *)info->fw->data;
1062 adev->firmware.fw_size +=
1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1064
1065 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067 info->fw = adev->gfx.rlc_fw;
1068 header = (const struct common_firmware_header *)info->fw->data;
1069 adev->firmware.fw_size +=
1070 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1071
1072 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074 info->fw = adev->gfx.mec_fw;
1075 header = (const struct common_firmware_header *)info->fw->data;
1076 adev->firmware.fw_size +=
1077 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1078
4c2b2453
ML
1079 /* we need account JT in */
1080 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081 adev->firmware.fw_size +=
1082 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1083
bed5712e
ML
1084 if (amdgpu_sriov_vf(adev)) {
1085 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087 info->fw = adev->gfx.mec_fw;
1088 adev->firmware.fw_size +=
1089 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1090 }
1091
aaa36a97
AD
1092 if (adev->gfx.mec2_fw) {
1093 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095 info->fw = adev->gfx.mec2_fw;
1096 header = (const struct common_firmware_header *)info->fw->data;
1097 adev->firmware.fw_size +=
1098 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1099 }
1100
1101 }
1102
1103out:
1104 if (err) {
1105 dev_err(adev->dev,
1106 "gfx8: Failed to load firmware \"%s\"\n",
1107 fw_name);
1108 release_firmware(adev->gfx.pfp_fw);
1109 adev->gfx.pfp_fw = NULL;
1110 release_firmware(adev->gfx.me_fw);
1111 adev->gfx.me_fw = NULL;
1112 release_firmware(adev->gfx.ce_fw);
1113 adev->gfx.ce_fw = NULL;
1114 release_firmware(adev->gfx.rlc_fw);
1115 adev->gfx.rlc_fw = NULL;
1116 release_firmware(adev->gfx.mec_fw);
1117 adev->gfx.mec_fw = NULL;
1118 release_firmware(adev->gfx.mec2_fw);
1119 adev->gfx.mec2_fw = NULL;
1120 }
1121 return err;
1122}
1123
2b6cd977
EH
1124static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125 volatile u32 *buffer)
1126{
1127 u32 count = 0, i;
1128 const struct cs_section_def *sect = NULL;
1129 const struct cs_extent_def *ext = NULL;
1130
1131 if (adev->gfx.rlc.cs_data == NULL)
1132 return;
1133 if (buffer == NULL)
1134 return;
1135
1136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1138
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140 buffer[count++] = cpu_to_le32(0x80000000);
1141 buffer[count++] = cpu_to_le32(0x80000000);
1142
1143 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145 if (sect->id == SECT_CONTEXT) {
1146 buffer[count++] =
1147 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148 buffer[count++] = cpu_to_le32(ext->reg_index -
1149 PACKET3_SET_CONTEXT_REG_START);
1150 for (i = 0; i < ext->reg_count; i++)
1151 buffer[count++] = cpu_to_le32(ext->extent[i]);
1152 } else {
1153 return;
1154 }
1155 }
1156 }
1157
1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160 PACKET3_SET_CONTEXT_REG_START);
34817db6
AD
1161 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
2b6cd977
EH
1163
1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1166
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168 buffer[count++] = cpu_to_le32(0);
1169}
1170
fb16007b
AD
1171static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1172{
1173 const __le32 *fw_data;
1174 volatile u32 *dst_ptr;
1175 int me, i, max_me = 4;
1176 u32 bo_offset = 0;
1177 u32 table_offset, table_size;
1178
1179 if (adev->asic_type == CHIP_CARRIZO)
1180 max_me = 5;
1181
1182 /* write the cp table buffer */
1183 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184 for (me = 0; me < max_me; me++) {
1185 if (me == 0) {
1186 const struct gfx_firmware_header_v1_0 *hdr =
1187 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188 fw_data = (const __le32 *)
1189 (adev->gfx.ce_fw->data +
1190 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191 table_offset = le32_to_cpu(hdr->jt_offset);
1192 table_size = le32_to_cpu(hdr->jt_size);
1193 } else if (me == 1) {
1194 const struct gfx_firmware_header_v1_0 *hdr =
1195 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196 fw_data = (const __le32 *)
1197 (adev->gfx.pfp_fw->data +
1198 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199 table_offset = le32_to_cpu(hdr->jt_offset);
1200 table_size = le32_to_cpu(hdr->jt_size);
1201 } else if (me == 2) {
1202 const struct gfx_firmware_header_v1_0 *hdr =
1203 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204 fw_data = (const __le32 *)
1205 (adev->gfx.me_fw->data +
1206 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207 table_offset = le32_to_cpu(hdr->jt_offset);
1208 table_size = le32_to_cpu(hdr->jt_size);
1209 } else if (me == 3) {
1210 const struct gfx_firmware_header_v1_0 *hdr =
1211 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212 fw_data = (const __le32 *)
1213 (adev->gfx.mec_fw->data +
1214 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215 table_offset = le32_to_cpu(hdr->jt_offset);
1216 table_size = le32_to_cpu(hdr->jt_size);
1217 } else if (me == 4) {
1218 const struct gfx_firmware_header_v1_0 *hdr =
1219 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220 fw_data = (const __le32 *)
1221 (adev->gfx.mec2_fw->data +
1222 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223 table_offset = le32_to_cpu(hdr->jt_offset);
1224 table_size = le32_to_cpu(hdr->jt_size);
1225 }
1226
1227 for (i = 0; i < table_size; i ++) {
1228 dst_ptr[bo_offset + i] =
1229 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1230 }
1231
1232 bo_offset += table_size;
1233 }
1234}
1235
2b6cd977
EH
1236static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1237{
1238 int r;
1239
1240 /* clear state block */
1241 if (adev->gfx.rlc.clear_state_obj) {
1242 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1243 if (unlikely(r != 0))
62d2ce4b 1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
2b6cd977
EH
1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2b6cd977
EH
1247 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248 adev->gfx.rlc.clear_state_obj = NULL;
1249 }
fb16007b
AD
1250
1251 /* jump table block */
1252 if (adev->gfx.rlc.cp_table_obj) {
1253 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1254 if (unlikely(r != 0))
1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1258 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259 adev->gfx.rlc.cp_table_obj = NULL;
1260 }
2b6cd977
EH
1261}
1262
1263static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1264{
1265 volatile u32 *dst_ptr;
1266 u32 dws;
1267 const struct cs_section_def *cs_data;
1268 int r;
1269
1270 adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272 cs_data = adev->gfx.rlc.cs_data;
1273
1274 if (cs_data) {
1275 /* clear state block */
1276 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1277
1278 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
2b6cd977
EH
1283 NULL, NULL,
1284 &adev->gfx.rlc.clear_state_obj);
1285 if (r) {
1286 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287 gfx_v8_0_rlc_fini(adev);
1288 return r;
1289 }
1290 }
1291 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292 if (unlikely(r != 0)) {
1293 gfx_v8_0_rlc_fini(adev);
1294 return r;
1295 }
1296 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297 &adev->gfx.rlc.clear_state_gpu_addr);
1298 if (r) {
1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
62d2ce4b 1300 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
2b6cd977
EH
1301 gfx_v8_0_rlc_fini(adev);
1302 return r;
1303 }
1304
1305 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1306 if (r) {
62d2ce4b 1307 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
2b6cd977
EH
1308 gfx_v8_0_rlc_fini(adev);
1309 return r;
1310 }
1311 /* set up the cs buffer */
1312 dst_ptr = adev->gfx.rlc.cs_ptr;
1313 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1316 }
1317
fb16007b
AD
1318 if ((adev->asic_type == CHIP_CARRIZO) ||
1319 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1320 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
fb16007b
AD
1321 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1324 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
fb16007b
AD
1326 NULL, NULL,
1327 &adev->gfx.rlc.cp_table_obj);
1328 if (r) {
1329 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1330 return r;
1331 }
1332 }
1333
1334 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335 if (unlikely(r != 0)) {
1336 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1337 return r;
1338 }
1339 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340 &adev->gfx.rlc.cp_table_gpu_addr);
1341 if (r) {
1342 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
62d2ce4b 1343 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
fb16007b
AD
1344 return r;
1345 }
1346 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1347 if (r) {
1348 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1349 return r;
1350 }
1351
1352 cz_init_cp_jump_table(adev);
1353
1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1356 }
1357
2b6cd977
EH
1358 return 0;
1359}
1360
aaa36a97
AD
1361static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362{
1363 int r;
1364
1365 if (adev->gfx.mec.hpd_eop_obj) {
1366 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1367 if (unlikely(r != 0))
1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
aaa36a97
AD
1371 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372 adev->gfx.mec.hpd_eop_obj = NULL;
1373 }
1374}
1375
4e638ae9
XY
1376static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377 struct amdgpu_ring *ring,
1378 struct amdgpu_irq_src *irq)
1379{
1380 int r = 0;
1381
bffa2280
ML
1382 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1383 if (r)
1384 return r;
880e87e3 1385
4e638ae9
XY
1386 ring->adev = NULL;
1387 ring->ring_obj = NULL;
1388 ring->use_doorbell = true;
1389 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1390 if (adev->gfx.mec2_fw) {
1391 ring->me = 2;
1392 ring->pipe = 0;
1393 } else {
1394 ring->me = 1;
1395 ring->pipe = 1;
1396 }
1397
4e638ae9
XY
1398 ring->queue = 0;
1399 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1400 r = amdgpu_ring_init(adev, ring, 1024,
1401 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1402 if (r)
1403 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1404
1405 return r;
1406}
4e638ae9
XY
1407static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1408 struct amdgpu_irq_src *irq)
1409{
bffa2280 1410 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
4e638ae9 1411 amdgpu_ring_fini(ring);
4e638ae9
XY
1412}
1413
aaa36a97
AD
1414#define MEC_HPD_SIZE 2048
1415
1416static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1417{
1418 int r;
1419 u32 *hpd;
1420
1421 /*
1422 * we assign only 1 pipe because all other pipes will
1423 * be handled by KFD
1424 */
1425 adev->gfx.mec.num_mec = 1;
1426 adev->gfx.mec.num_pipe = 1;
1427 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1428
1429 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1430 r = amdgpu_bo_create(adev,
ad3b9614 1431 adev->gfx.mec.num_queue * MEC_HPD_SIZE,
aaa36a97 1432 PAGE_SIZE, true,
72d7668b 1433 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
1434 &adev->gfx.mec.hpd_eop_obj);
1435 if (r) {
1436 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1437 return r;
1438 }
1439 }
1440
1441 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1442 if (unlikely(r != 0)) {
1443 gfx_v8_0_mec_fini(adev);
1444 return r;
1445 }
1446 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1447 &adev->gfx.mec.hpd_eop_gpu_addr);
1448 if (r) {
1449 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1450 gfx_v8_0_mec_fini(adev);
1451 return r;
1452 }
1453 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1454 if (r) {
1455 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1456 gfx_v8_0_mec_fini(adev);
1457 return r;
1458 }
1459
ad3b9614 1460 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
aaa36a97
AD
1461
1462 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1463 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1464
1465 return 0;
1466}
1467
4e638ae9
XY
1468static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1469{
1470 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1471
1472 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
4e638ae9
XY
1473}
1474
1475static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1476{
1477 int r;
1478 u32 *hpd;
1479 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1480
1481 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1482 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1483 &kiq->eop_gpu_addr, (void **)&hpd);
1484 if (r) {
1485 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1486 return r;
1487 }
1488
1489 memset(hpd, 0, MEC_HPD_SIZE);
1490
1491 amdgpu_bo_kunmap(kiq->eop_obj);
1492
1493 return 0;
1494}
1495
ccba7691
AD
1496static const u32 vgpr_init_compute_shader[] =
1497{
1498 0x7e000209, 0x7e020208,
1499 0x7e040207, 0x7e060206,
1500 0x7e080205, 0x7e0a0204,
1501 0x7e0c0203, 0x7e0e0202,
1502 0x7e100201, 0x7e120200,
1503 0x7e140209, 0x7e160208,
1504 0x7e180207, 0x7e1a0206,
1505 0x7e1c0205, 0x7e1e0204,
1506 0x7e200203, 0x7e220202,
1507 0x7e240201, 0x7e260200,
1508 0x7e280209, 0x7e2a0208,
1509 0x7e2c0207, 0x7e2e0206,
1510 0x7e300205, 0x7e320204,
1511 0x7e340203, 0x7e360202,
1512 0x7e380201, 0x7e3a0200,
1513 0x7e3c0209, 0x7e3e0208,
1514 0x7e400207, 0x7e420206,
1515 0x7e440205, 0x7e460204,
1516 0x7e480203, 0x7e4a0202,
1517 0x7e4c0201, 0x7e4e0200,
1518 0x7e500209, 0x7e520208,
1519 0x7e540207, 0x7e560206,
1520 0x7e580205, 0x7e5a0204,
1521 0x7e5c0203, 0x7e5e0202,
1522 0x7e600201, 0x7e620200,
1523 0x7e640209, 0x7e660208,
1524 0x7e680207, 0x7e6a0206,
1525 0x7e6c0205, 0x7e6e0204,
1526 0x7e700203, 0x7e720202,
1527 0x7e740201, 0x7e760200,
1528 0x7e780209, 0x7e7a0208,
1529 0x7e7c0207, 0x7e7e0206,
1530 0xbf8a0000, 0xbf810000,
1531};
1532
1533static const u32 sgpr_init_compute_shader[] =
1534{
1535 0xbe8a0100, 0xbe8c0102,
1536 0xbe8e0104, 0xbe900106,
1537 0xbe920108, 0xbe940100,
1538 0xbe960102, 0xbe980104,
1539 0xbe9a0106, 0xbe9c0108,
1540 0xbe9e0100, 0xbea00102,
1541 0xbea20104, 0xbea40106,
1542 0xbea60108, 0xbea80100,
1543 0xbeaa0102, 0xbeac0104,
1544 0xbeae0106, 0xbeb00108,
1545 0xbeb20100, 0xbeb40102,
1546 0xbeb60104, 0xbeb80106,
1547 0xbeba0108, 0xbebc0100,
1548 0xbebe0102, 0xbec00104,
1549 0xbec20106, 0xbec40108,
1550 0xbec60100, 0xbec80102,
1551 0xbee60004, 0xbee70005,
1552 0xbeea0006, 0xbeeb0007,
1553 0xbee80008, 0xbee90009,
1554 0xbefc0000, 0xbf8a0000,
1555 0xbf810000, 0x00000000,
1556};
1557
1558static const u32 vgpr_init_regs[] =
1559{
1560 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1561 mmCOMPUTE_RESOURCE_LIMITS, 0,
1562 mmCOMPUTE_NUM_THREAD_X, 256*4,
1563 mmCOMPUTE_NUM_THREAD_Y, 1,
1564 mmCOMPUTE_NUM_THREAD_Z, 1,
1565 mmCOMPUTE_PGM_RSRC2, 20,
1566 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1567 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1568 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1569 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1570 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1571 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1572 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1573 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1574 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1575 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1576};
1577
1578static const u32 sgpr1_init_regs[] =
1579{
1580 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1581 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1582 mmCOMPUTE_NUM_THREAD_X, 256*5,
1583 mmCOMPUTE_NUM_THREAD_Y, 1,
1584 mmCOMPUTE_NUM_THREAD_Z, 1,
1585 mmCOMPUTE_PGM_RSRC2, 20,
1586 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1587 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1588 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1589 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1590 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1591 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1592 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1593 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1594 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1595 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1596};
1597
1598static const u32 sgpr2_init_regs[] =
1599{
1600 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1601 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1602 mmCOMPUTE_NUM_THREAD_X, 256*5,
1603 mmCOMPUTE_NUM_THREAD_Y, 1,
1604 mmCOMPUTE_NUM_THREAD_Z, 1,
1605 mmCOMPUTE_PGM_RSRC2, 20,
1606 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1607 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1608 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1609 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1610 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1611 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1612 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1613 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1614 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1615 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1616};
1617
1618static const u32 sec_ded_counter_registers[] =
1619{
1620 mmCPC_EDC_ATC_CNT,
1621 mmCPC_EDC_SCRATCH_CNT,
1622 mmCPC_EDC_UCODE_CNT,
1623 mmCPF_EDC_ATC_CNT,
1624 mmCPF_EDC_ROQ_CNT,
1625 mmCPF_EDC_TAG_CNT,
1626 mmCPG_EDC_ATC_CNT,
1627 mmCPG_EDC_DMA_CNT,
1628 mmCPG_EDC_TAG_CNT,
1629 mmDC_EDC_CSINVOC_CNT,
1630 mmDC_EDC_RESTORE_CNT,
1631 mmDC_EDC_STATE_CNT,
1632 mmGDS_EDC_CNT,
1633 mmGDS_EDC_GRBM_CNT,
1634 mmGDS_EDC_OA_DED,
1635 mmSPI_EDC_CNT,
1636 mmSQC_ATC_EDC_GATCL1_CNT,
1637 mmSQC_EDC_CNT,
1638 mmSQ_EDC_DED_CNT,
1639 mmSQ_EDC_INFO,
1640 mmSQ_EDC_SEC_CNT,
1641 mmTCC_EDC_CNT,
1642 mmTCP_ATC_EDC_GATCL1_CNT,
1643 mmTCP_EDC_CNT,
1644 mmTD_EDC_CNT
1645};
1646
1647static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1648{
1649 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1650 struct amdgpu_ib ib;
f54d1867 1651 struct dma_fence *f = NULL;
ccba7691
AD
1652 int r, i;
1653 u32 tmp;
1654 unsigned total_size, vgpr_offset, sgpr_offset;
1655 u64 gpu_addr;
1656
1657 /* only supported on CZ */
1658 if (adev->asic_type != CHIP_CARRIZO)
1659 return 0;
1660
1661 /* bail if the compute ring is not ready */
1662 if (!ring->ready)
1663 return 0;
1664
1665 tmp = RREG32(mmGB_EDC_MODE);
1666 WREG32(mmGB_EDC_MODE, 0);
1667
1668 total_size =
1669 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1670 total_size +=
1671 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1672 total_size +=
1673 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1674 total_size = ALIGN(total_size, 256);
1675 vgpr_offset = total_size;
1676 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1677 sgpr_offset = total_size;
1678 total_size += sizeof(sgpr_init_compute_shader);
1679
1680 /* allocate an indirect buffer to put the commands in */
1681 memset(&ib, 0, sizeof(ib));
b07c60c0 1682 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1683 if (r) {
1684 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1685 return r;
1686 }
1687
1688 /* load the compute shaders */
1689 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1690 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1691
1692 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1693 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1694
1695 /* init the ib length to 0 */
1696 ib.length_dw = 0;
1697
1698 /* VGPR */
1699 /* write the register state for the compute dispatch */
1700 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1701 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1702 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1703 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1704 }
1705 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1706 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1708 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1709 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1710 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1711
1712 /* write dispatch packet */
1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1714 ib.ptr[ib.length_dw++] = 8; /* x */
1715 ib.ptr[ib.length_dw++] = 1; /* y */
1716 ib.ptr[ib.length_dw++] = 1; /* z */
1717 ib.ptr[ib.length_dw++] =
1718 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1719
1720 /* write CS partial flush packet */
1721 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1722 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1723
1724 /* SGPR1 */
1725 /* write the register state for the compute dispatch */
1726 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1728 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1729 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1730 }
1731 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1732 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1734 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1735 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1736 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1737
1738 /* write dispatch packet */
1739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1740 ib.ptr[ib.length_dw++] = 8; /* x */
1741 ib.ptr[ib.length_dw++] = 1; /* y */
1742 ib.ptr[ib.length_dw++] = 1; /* z */
1743 ib.ptr[ib.length_dw++] =
1744 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1745
1746 /* write CS partial flush packet */
1747 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1748 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1749
1750 /* SGPR2 */
1751 /* write the register state for the compute dispatch */
1752 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1753 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1754 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1755 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1756 }
1757 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1758 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1760 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1761 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1762 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1763
1764 /* write dispatch packet */
1765 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1766 ib.ptr[ib.length_dw++] = 8; /* x */
1767 ib.ptr[ib.length_dw++] = 1; /* y */
1768 ib.ptr[ib.length_dw++] = 1; /* z */
1769 ib.ptr[ib.length_dw++] =
1770 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1771
1772 /* write CS partial flush packet */
1773 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1774 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1775
1776 /* shedule the ib on the ring */
50ddc75e 1777 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
ccba7691
AD
1778 if (r) {
1779 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1780 goto fail;
1781 }
1782
1783 /* wait for the GPU to finish processing the IB */
f54d1867 1784 r = dma_fence_wait(f, false);
ccba7691
AD
1785 if (r) {
1786 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1787 goto fail;
1788 }
1789
1790 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1791 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1792 WREG32(mmGB_EDC_MODE, tmp);
1793
1794 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1795 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1796 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1797
1798
1799 /* read back registers to clear the counters */
1800 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1801 RREG32(sec_ded_counter_registers[i]);
1802
1803fail:
cc55c45d 1804 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 1805 dma_fence_put(f);
ccba7691
AD
1806
1807 return r;
1808}
1809
68182d90 1810static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1811{
1812 u32 gb_addr_config;
1813 u32 mc_shared_chmap, mc_arb_ramcfg;
1814 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1815 u32 tmp;
68182d90 1816 int ret;
0bde3a95
AD
1817
1818 switch (adev->asic_type) {
1819 case CHIP_TOPAZ:
1820 adev->gfx.config.max_shader_engines = 1;
1821 adev->gfx.config.max_tile_pipes = 2;
1822 adev->gfx.config.max_cu_per_sh = 6;
1823 adev->gfx.config.max_sh_per_se = 1;
1824 adev->gfx.config.max_backends_per_se = 2;
1825 adev->gfx.config.max_texture_channel_caches = 2;
1826 adev->gfx.config.max_gprs = 256;
1827 adev->gfx.config.max_gs_threads = 32;
1828 adev->gfx.config.max_hw_contexts = 8;
1829
1830 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1831 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1832 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1833 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1834 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1835 break;
1836 case CHIP_FIJI:
1837 adev->gfx.config.max_shader_engines = 4;
1838 adev->gfx.config.max_tile_pipes = 16;
1839 adev->gfx.config.max_cu_per_sh = 16;
1840 adev->gfx.config.max_sh_per_se = 1;
1841 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1842 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1843 adev->gfx.config.max_gprs = 256;
1844 adev->gfx.config.max_gs_threads = 32;
1845 adev->gfx.config.max_hw_contexts = 8;
1846
68182d90
FC
1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852 break;
2cc0c0b5 1853 case CHIP_POLARIS11:
c4642a47 1854 case CHIP_POLARIS12:
68182d90
FC
1855 ret = amdgpu_atombios_get_gfx_info(adev);
1856 if (ret)
1857 return ret;
1858 adev->gfx.config.max_gprs = 256;
1859 adev->gfx.config.max_gs_threads = 32;
1860 adev->gfx.config.max_hw_contexts = 8;
1861
1862 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1866 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1867 break;
2cc0c0b5 1868 case CHIP_POLARIS10:
68182d90
FC
1869 ret = amdgpu_atombios_get_gfx_info(adev);
1870 if (ret)
1871 return ret;
1872 adev->gfx.config.max_gprs = 256;
1873 adev->gfx.config.max_gs_threads = 32;
1874 adev->gfx.config.max_hw_contexts = 8;
1875
0bde3a95
AD
1876 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1877 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1878 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1879 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1880 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1881 break;
1882 case CHIP_TONGA:
1883 adev->gfx.config.max_shader_engines = 4;
1884 adev->gfx.config.max_tile_pipes = 8;
1885 adev->gfx.config.max_cu_per_sh = 8;
1886 adev->gfx.config.max_sh_per_se = 1;
1887 adev->gfx.config.max_backends_per_se = 2;
1888 adev->gfx.config.max_texture_channel_caches = 8;
1889 adev->gfx.config.max_gprs = 256;
1890 adev->gfx.config.max_gs_threads = 32;
1891 adev->gfx.config.max_hw_contexts = 8;
1892
1893 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1894 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1895 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1896 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1897 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1898 break;
1899 case CHIP_CARRIZO:
1900 adev->gfx.config.max_shader_engines = 1;
1901 adev->gfx.config.max_tile_pipes = 2;
1902 adev->gfx.config.max_sh_per_se = 1;
1903 adev->gfx.config.max_backends_per_se = 2;
1904
1905 switch (adev->pdev->revision) {
1906 case 0xc4:
1907 case 0x84:
1908 case 0xc8:
1909 case 0xcc:
b8b339ea
AD
1910 case 0xe1:
1911 case 0xe3:
0bde3a95
AD
1912 /* B10 */
1913 adev->gfx.config.max_cu_per_sh = 8;
1914 break;
1915 case 0xc5:
1916 case 0x81:
1917 case 0x85:
1918 case 0xc9:
1919 case 0xcd:
b8b339ea
AD
1920 case 0xe2:
1921 case 0xe4:
0bde3a95
AD
1922 /* B8 */
1923 adev->gfx.config.max_cu_per_sh = 6;
1924 break;
1925 case 0xc6:
1926 case 0xca:
1927 case 0xce:
b8b339ea 1928 case 0x88:
0bde3a95
AD
1929 /* B6 */
1930 adev->gfx.config.max_cu_per_sh = 6;
1931 break;
1932 case 0xc7:
1933 case 0x87:
1934 case 0xcb:
b8b339ea
AD
1935 case 0xe5:
1936 case 0x89:
0bde3a95
AD
1937 default:
1938 /* B4 */
1939 adev->gfx.config.max_cu_per_sh = 4;
1940 break;
1941 }
1942
1943 adev->gfx.config.max_texture_channel_caches = 2;
1944 adev->gfx.config.max_gprs = 256;
1945 adev->gfx.config.max_gs_threads = 32;
1946 adev->gfx.config.max_hw_contexts = 8;
1947
e3c7656c
SL
1948 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1949 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1950 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1951 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1952 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1953 break;
1954 case CHIP_STONEY:
1955 adev->gfx.config.max_shader_engines = 1;
1956 adev->gfx.config.max_tile_pipes = 2;
1957 adev->gfx.config.max_sh_per_se = 1;
1958 adev->gfx.config.max_backends_per_se = 1;
1959
1960 switch (adev->pdev->revision) {
1961 case 0xc0:
1962 case 0xc1:
1963 case 0xc2:
1964 case 0xc4:
1965 case 0xc8:
1966 case 0xc9:
1967 adev->gfx.config.max_cu_per_sh = 3;
1968 break;
1969 case 0xd0:
1970 case 0xd1:
1971 case 0xd2:
1972 default:
1973 adev->gfx.config.max_cu_per_sh = 2;
1974 break;
1975 }
1976
1977 adev->gfx.config.max_texture_channel_caches = 2;
1978 adev->gfx.config.max_gprs = 256;
1979 adev->gfx.config.max_gs_threads = 16;
1980 adev->gfx.config.max_hw_contexts = 8;
1981
0bde3a95
AD
1982 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1983 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1984 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1985 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1986 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1987 break;
1988 default:
1989 adev->gfx.config.max_shader_engines = 2;
1990 adev->gfx.config.max_tile_pipes = 4;
1991 adev->gfx.config.max_cu_per_sh = 2;
1992 adev->gfx.config.max_sh_per_se = 1;
1993 adev->gfx.config.max_backends_per_se = 2;
1994 adev->gfx.config.max_texture_channel_caches = 4;
1995 adev->gfx.config.max_gprs = 256;
1996 adev->gfx.config.max_gs_threads = 32;
1997 adev->gfx.config.max_hw_contexts = 8;
1998
1999 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2000 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2001 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2002 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2003 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2004 break;
2005 }
2006
2007 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2008 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2009 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2010
2011 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2012 adev->gfx.config.mem_max_burst_length_bytes = 256;
2013 if (adev->flags & AMD_IS_APU) {
2014 /* Get memory bank mapping mode. */
2015 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2016 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2017 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2018
2019 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2020 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2021 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2022
2023 /* Validate settings in case only one DIMM installed. */
2024 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2025 dimm00_addr_map = 0;
2026 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2027 dimm01_addr_map = 0;
2028 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2029 dimm10_addr_map = 0;
2030 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2031 dimm11_addr_map = 0;
2032
2033 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2034 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2035 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2036 adev->gfx.config.mem_row_size_in_kb = 2;
2037 else
2038 adev->gfx.config.mem_row_size_in_kb = 1;
2039 } else {
2040 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2041 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2042 if (adev->gfx.config.mem_row_size_in_kb > 4)
2043 adev->gfx.config.mem_row_size_in_kb = 4;
2044 }
2045
2046 adev->gfx.config.shader_engine_tile_size = 32;
2047 adev->gfx.config.num_gpus = 1;
2048 adev->gfx.config.multi_gpu_tile_size = 64;
2049
2050 /* fix up row size */
2051 switch (adev->gfx.config.mem_row_size_in_kb) {
2052 case 1:
2053 default:
2054 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2055 break;
2056 case 2:
2057 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2058 break;
2059 case 4:
2060 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2061 break;
2062 }
2063 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
2064
2065 return 0;
0bde3a95
AD
2066}
2067
5fc3aeeb 2068static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
2069{
2070 int i, r;
2071 struct amdgpu_ring *ring;
4e638ae9 2072 struct amdgpu_kiq *kiq;
5fc3aeeb 2073 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2074
4e638ae9 2075 /* KIQ event */
d766e6a3 2076 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
4e638ae9
XY
2077 if (r)
2078 return r;
2079
aaa36a97 2080 /* EOP Event */
d766e6a3 2081 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
aaa36a97
AD
2082 if (r)
2083 return r;
2084
2085 /* Privileged reg */
d766e6a3
AD
2086 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2087 &adev->gfx.priv_reg_irq);
aaa36a97
AD
2088 if (r)
2089 return r;
2090
2091 /* Privileged inst */
d766e6a3
AD
2092 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2093 &adev->gfx.priv_inst_irq);
aaa36a97
AD
2094 if (r)
2095 return r;
2096
2097 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2098
2099 gfx_v8_0_scratch_init(adev);
2100
2101 r = gfx_v8_0_init_microcode(adev);
2102 if (r) {
2103 DRM_ERROR("Failed to load gfx firmware!\n");
2104 return r;
2105 }
2106
2b6cd977
EH
2107 r = gfx_v8_0_rlc_init(adev);
2108 if (r) {
2109 DRM_ERROR("Failed to init rlc BOs!\n");
2110 return r;
2111 }
2112
aaa36a97
AD
2113 r = gfx_v8_0_mec_init(adev);
2114 if (r) {
2115 DRM_ERROR("Failed to init MEC BOs!\n");
2116 return r;
2117 }
2118
aaa36a97
AD
2119 /* set up the gfx ring */
2120 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2121 ring = &adev->gfx.gfx_ring[i];
2122 ring->ring_obj = NULL;
2123 sprintf(ring->name, "gfx");
2124 /* no gfx doorbells on iceland */
2125 if (adev->asic_type != CHIP_TOPAZ) {
2126 ring->use_doorbell = true;
2127 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2128 }
2129
79887142
CK
2130 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2131 AMDGPU_CP_IRQ_GFX_EOP);
aaa36a97
AD
2132 if (r)
2133 return r;
2134 }
2135
2136 /* set up the compute queues */
2137 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2138 unsigned irq_type;
2139
2140 /* max 32 queues per MEC */
2141 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2142 DRM_ERROR("Too many (%d) compute rings!\n", i);
2143 break;
2144 }
2145 ring = &adev->gfx.compute_ring[i];
2146 ring->ring_obj = NULL;
2147 ring->use_doorbell = true;
2148 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2149 ring->me = 1; /* first MEC */
2150 ring->pipe = i / 8;
2151 ring->queue = i % 8;
771c8ec1 2152 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
aaa36a97
AD
2153 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2154 /* type-2 packets are deprecated on MEC, use type-3 instead */
79887142
CK
2155 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2156 irq_type);
aaa36a97
AD
2157 if (r)
2158 return r;
2159 }
2160
596c67d0
ML
2161 if (amdgpu_sriov_vf(adev)) {
2162 r = gfx_v8_0_kiq_init(adev);
2163 if (r) {
2164 DRM_ERROR("Failed to init KIQ BOs!\n");
2165 return r;
2166 }
2167
2168 kiq = &adev->gfx.kiq;
2169 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2170 if (r)
2171 return r;
2172
2173 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2174 r = gfx_v8_0_compute_mqd_soft_init(adev);
2175 if (r)
2176 return r;
2177 }
2178
aaa36a97 2179 /* reserve GDS, GWS and OA resource for gfx */
78bbbd9c
CK
2180 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2181 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2182 &adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2183 if (r)
2184 return r;
2185
78bbbd9c
CK
2186 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2187 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2188 &adev->gds.gws_gfx_bo, NULL, NULL);
aaa36a97
AD
2189 if (r)
2190 return r;
2191
78bbbd9c
CK
2192 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2193 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2194 &adev->gds.oa_gfx_bo, NULL, NULL);
aaa36a97
AD
2195 if (r)
2196 return r;
2197
a101a899
KW
2198 adev->gfx.ce_ram_size = 0x8000;
2199
68182d90
FC
2200 r = gfx_v8_0_gpu_early_init(adev);
2201 if (r)
2202 return r;
0bde3a95 2203
aaa36a97
AD
2204 return 0;
2205}
2206
5fc3aeeb 2207static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2208{
2209 int i;
5fc3aeeb 2210 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2211
8640faed
JZ
2212 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2213 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2214 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2215
2216 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2217 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2218 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2219 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2220
596c67d0
ML
2221 if (amdgpu_sriov_vf(adev)) {
2222 gfx_v8_0_compute_mqd_soft_fini(adev);
2223 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2224 gfx_v8_0_kiq_fini(adev);
2225 }
2226
aaa36a97 2227 gfx_v8_0_mec_fini(adev);
2b6cd977 2228 gfx_v8_0_rlc_fini(adev);
13331ac3 2229 gfx_v8_0_free_microcode(adev);
2b6cd977 2230
aaa36a97
AD
2231 return 0;
2232}
2233
2234static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2235{
90bea0ab 2236 uint32_t *modearray, *mod2array;
eb64526f
TSD
2237 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2238 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2239 u32 reg_offset;
aaa36a97 2240
90bea0ab
TSD
2241 modearray = adev->gfx.config.tile_mode_array;
2242 mod2array = adev->gfx.config.macrotile_mode_array;
2243
2244 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2245 modearray[reg_offset] = 0;
2246
2247 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2248 mod2array[reg_offset] = 0;
aaa36a97
AD
2249
2250 switch (adev->asic_type) {
2251 case CHIP_TOPAZ:
90bea0ab
TSD
2252 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2275 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2276 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2277 PIPE_CONFIG(ADDR_SURF_P2) |
2278 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2280 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2281 PIPE_CONFIG(ADDR_SURF_P2));
2282 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P2) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2286 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2294 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2302 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307 PIPE_CONFIG(ADDR_SURF_P2) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2310 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2326 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2330 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2331 PIPE_CONFIG(ADDR_SURF_P2) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2335 PIPE_CONFIG(ADDR_SURF_P2) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2338 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2339 PIPE_CONFIG(ADDR_SURF_P2) |
2340 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2342 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2346 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347 PIPE_CONFIG(ADDR_SURF_P2) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2350 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2351 PIPE_CONFIG(ADDR_SURF_P2) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2354
2355 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 NUM_BANKS(ADDR_SURF_8_BANK));
2359 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362 NUM_BANKS(ADDR_SURF_8_BANK));
2363 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2366 NUM_BANKS(ADDR_SURF_8_BANK));
2367 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370 NUM_BANKS(ADDR_SURF_8_BANK));
2371 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374 NUM_BANKS(ADDR_SURF_8_BANK));
2375 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2378 NUM_BANKS(ADDR_SURF_8_BANK));
2379 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2382 NUM_BANKS(ADDR_SURF_8_BANK));
2383 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2386 NUM_BANKS(ADDR_SURF_16_BANK));
2387 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2390 NUM_BANKS(ADDR_SURF_16_BANK));
2391 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 NUM_BANKS(ADDR_SURF_16_BANK));
2395 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2398 NUM_BANKS(ADDR_SURF_16_BANK));
2399 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2402 NUM_BANKS(ADDR_SURF_16_BANK));
2403 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2406 NUM_BANKS(ADDR_SURF_16_BANK));
2407 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2410 NUM_BANKS(ADDR_SURF_8_BANK));
2411
2412 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2413 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2414 reg_offset != 23)
2415 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2416
2417 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2418 if (reg_offset != 7)
2419 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2420
8cdacf44 2421 break;
af15a2d5 2422 case CHIP_FIJI:
90bea0ab
TSD
2423 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2431 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2435 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2439 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2443 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2444 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2447 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2448 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2449 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2451 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2455 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2456 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2457 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2458 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2465 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2466 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2467 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2469 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2470 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2471 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2473 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2477 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2478 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2481 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2482 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2485 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2486 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2487 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2489 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2490 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2491 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2492 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2493 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2494 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2495 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2496 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2497 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2498 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2500 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2501 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2502 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2505 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2506 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2509 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2510 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2512 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2513 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2514 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2517 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2518 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2520 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2521 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2522 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2523 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2524 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2525 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2526 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2528 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2529 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2532 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2536 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2540 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545
2546 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2548 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2549 NUM_BANKS(ADDR_SURF_8_BANK));
2550 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2553 NUM_BANKS(ADDR_SURF_8_BANK));
2554 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2556 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557 NUM_BANKS(ADDR_SURF_8_BANK));
2558 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2561 NUM_BANKS(ADDR_SURF_8_BANK));
2562 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2565 NUM_BANKS(ADDR_SURF_8_BANK));
2566 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2569 NUM_BANKS(ADDR_SURF_8_BANK));
2570 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2573 NUM_BANKS(ADDR_SURF_8_BANK));
2574 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2576 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2577 NUM_BANKS(ADDR_SURF_8_BANK));
2578 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2581 NUM_BANKS(ADDR_SURF_8_BANK));
2582 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2584 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2585 NUM_BANKS(ADDR_SURF_8_BANK));
2586 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2589 NUM_BANKS(ADDR_SURF_8_BANK));
2590 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2592 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2593 NUM_BANKS(ADDR_SURF_8_BANK));
2594 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2596 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2597 NUM_BANKS(ADDR_SURF_8_BANK));
2598 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2601 NUM_BANKS(ADDR_SURF_4_BANK));
2602
2603 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2604 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2605
2606 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2607 if (reg_offset != 7)
2608 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2609
5f2e816b 2610 break;
aaa36a97 2611 case CHIP_TONGA:
90bea0ab
TSD
2612 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2616 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2624 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2628 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2632 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2637 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2647 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2653 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2655 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2657 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2658 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2659 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2662 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2663 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2664 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2671 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2675 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2678 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2683 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2686 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2687 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2690 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2691 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2695 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2698 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2699 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2702 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2707 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2711 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2715 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2730 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2734
2735 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2738 NUM_BANKS(ADDR_SURF_16_BANK));
2739 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2741 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742 NUM_BANKS(ADDR_SURF_16_BANK));
2743 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 NUM_BANKS(ADDR_SURF_16_BANK));
2747 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750 NUM_BANKS(ADDR_SURF_16_BANK));
2751 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754 NUM_BANKS(ADDR_SURF_16_BANK));
2755 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2758 NUM_BANKS(ADDR_SURF_16_BANK));
2759 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2762 NUM_BANKS(ADDR_SURF_16_BANK));
2763 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2766 NUM_BANKS(ADDR_SURF_16_BANK));
2767 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2769 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2770 NUM_BANKS(ADDR_SURF_16_BANK));
2771 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2774 NUM_BANKS(ADDR_SURF_16_BANK));
2775 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778 NUM_BANKS(ADDR_SURF_16_BANK));
2779 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2782 NUM_BANKS(ADDR_SURF_8_BANK));
2783 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2786 NUM_BANKS(ADDR_SURF_4_BANK));
2787 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2789 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2790 NUM_BANKS(ADDR_SURF_4_BANK));
2791
2792 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2793 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2794
2795 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2796 if (reg_offset != 7)
2797 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2798
68182d90 2799 break;
2cc0c0b5 2800 case CHIP_POLARIS11:
c4642a47 2801 case CHIP_POLARIS12:
68182d90
FC
2802 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2806 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2814 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2818 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2822 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2823 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2826 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2830 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2834 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2835 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2836 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2837 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2841 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2845 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2846 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2847 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2848 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2852 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2853 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2860 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2861 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2864 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2865 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2868 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2869 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2872 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2873 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2876 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2877 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2880 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2881 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2884 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2885 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2888 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2889 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2892 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2893 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2896 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2897 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2900 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2901 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2904 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2905 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2909 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2917 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2920 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2924
2925 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928 NUM_BANKS(ADDR_SURF_16_BANK));
2929
2930 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933 NUM_BANKS(ADDR_SURF_16_BANK));
2934
2935 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2937 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2938 NUM_BANKS(ADDR_SURF_16_BANK));
2939
2940 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2943 NUM_BANKS(ADDR_SURF_16_BANK));
2944
2945 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2948 NUM_BANKS(ADDR_SURF_16_BANK));
2949
2950 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2953 NUM_BANKS(ADDR_SURF_16_BANK));
2954
2955 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2958 NUM_BANKS(ADDR_SURF_16_BANK));
2959
2960 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 NUM_BANKS(ADDR_SURF_16_BANK));
2964
2965 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2966 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2967 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2968 NUM_BANKS(ADDR_SURF_16_BANK));
2969
2970 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2973 NUM_BANKS(ADDR_SURF_16_BANK));
2974
2975 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 NUM_BANKS(ADDR_SURF_16_BANK));
2979
2980 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983 NUM_BANKS(ADDR_SURF_16_BANK));
2984
2985 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2988 NUM_BANKS(ADDR_SURF_8_BANK));
2989
2990 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2993 NUM_BANKS(ADDR_SURF_4_BANK));
2994
2995 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2996 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2997
2998 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2999 if (reg_offset != 7)
3000 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3001
3002 break;
2cc0c0b5 3003 case CHIP_POLARIS10:
68182d90
FC
3004 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3008 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3012 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3015 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3016 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3018 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3019 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3020 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3022 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3023 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3024 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3025 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3026 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3027 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3028 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3029 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3030 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3032 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3033 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3034 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3036 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3037 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3038 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3040 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3041 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3042 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3044 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3047 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3048 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3049 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3050 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3051 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3052 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3054 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3055 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3056 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3058 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3059 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3060 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3061 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3062 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3063 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3064 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3066 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3067 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3070 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3071 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3074 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3075 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3078 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3079 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3082 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3083 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3087 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3090 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3091 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3094 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3095 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3099 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3103 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3107 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3111 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3114 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3119 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3122 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3126
3127 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3129 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3130 NUM_BANKS(ADDR_SURF_16_BANK));
3131
3132 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3135 NUM_BANKS(ADDR_SURF_16_BANK));
3136
3137 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3140 NUM_BANKS(ADDR_SURF_16_BANK));
3141
3142 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3145 NUM_BANKS(ADDR_SURF_16_BANK));
3146
3147 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 NUM_BANKS(ADDR_SURF_16_BANK));
3151
3152 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3155 NUM_BANKS(ADDR_SURF_16_BANK));
3156
3157 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3158 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3159 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3160 NUM_BANKS(ADDR_SURF_16_BANK));
3161
3162 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3165 NUM_BANKS(ADDR_SURF_16_BANK));
3166
3167 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170 NUM_BANKS(ADDR_SURF_16_BANK));
3171
3172 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3175 NUM_BANKS(ADDR_SURF_16_BANK));
3176
3177 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3178 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3179 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3180 NUM_BANKS(ADDR_SURF_16_BANK));
3181
3182 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3185 NUM_BANKS(ADDR_SURF_8_BANK));
3186
3187 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3190 NUM_BANKS(ADDR_SURF_4_BANK));
3191
3192 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3195 NUM_BANKS(ADDR_SURF_4_BANK));
3196
3197 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3198 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3199
3200 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3201 if (reg_offset != 7)
3202 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3203
aaa36a97 3204 break;
e3c7656c 3205 case CHIP_STONEY:
90bea0ab
TSD
3206 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207 PIPE_CONFIG(ADDR_SURF_P2) |
3208 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3209 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3210 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3211 PIPE_CONFIG(ADDR_SURF_P2) |
3212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3214 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3215 PIPE_CONFIG(ADDR_SURF_P2) |
3216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3218 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3219 PIPE_CONFIG(ADDR_SURF_P2) |
3220 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3222 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3223 PIPE_CONFIG(ADDR_SURF_P2) |
3224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3226 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227 PIPE_CONFIG(ADDR_SURF_P2) |
3228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3230 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3231 PIPE_CONFIG(ADDR_SURF_P2) |
3232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3234 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3235 PIPE_CONFIG(ADDR_SURF_P2));
3236 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3240 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3241 PIPE_CONFIG(ADDR_SURF_P2) |
3242 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3244 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3245 PIPE_CONFIG(ADDR_SURF_P2) |
3246 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3248 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3249 PIPE_CONFIG(ADDR_SURF_P2) |
3250 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3252 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3253 PIPE_CONFIG(ADDR_SURF_P2) |
3254 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3257 PIPE_CONFIG(ADDR_SURF_P2) |
3258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3260 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 PIPE_CONFIG(ADDR_SURF_P2) |
3262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3264 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3265 PIPE_CONFIG(ADDR_SURF_P2) |
3266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3268 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3269 PIPE_CONFIG(ADDR_SURF_P2) |
3270 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3272 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3273 PIPE_CONFIG(ADDR_SURF_P2) |
3274 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3276 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3280 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3284 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3288 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3289 PIPE_CONFIG(ADDR_SURF_P2) |
3290 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3292 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3293 PIPE_CONFIG(ADDR_SURF_P2) |
3294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3296 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297 PIPE_CONFIG(ADDR_SURF_P2) |
3298 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3301 PIPE_CONFIG(ADDR_SURF_P2) |
3302 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305 PIPE_CONFIG(ADDR_SURF_P2) |
3306 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308
3309 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312 NUM_BANKS(ADDR_SURF_8_BANK));
3313 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316 NUM_BANKS(ADDR_SURF_8_BANK));
3317 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3320 NUM_BANKS(ADDR_SURF_8_BANK));
3321 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3324 NUM_BANKS(ADDR_SURF_8_BANK));
3325 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3328 NUM_BANKS(ADDR_SURF_8_BANK));
3329 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3332 NUM_BANKS(ADDR_SURF_8_BANK));
3333 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3336 NUM_BANKS(ADDR_SURF_8_BANK));
3337 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3340 NUM_BANKS(ADDR_SURF_16_BANK));
3341 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3344 NUM_BANKS(ADDR_SURF_16_BANK));
3345 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348 NUM_BANKS(ADDR_SURF_16_BANK));
3349 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3352 NUM_BANKS(ADDR_SURF_16_BANK));
3353 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356 NUM_BANKS(ADDR_SURF_16_BANK));
3357 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360 NUM_BANKS(ADDR_SURF_16_BANK));
3361 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364 NUM_BANKS(ADDR_SURF_8_BANK));
3365
3366 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3367 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3368 reg_offset != 23)
3369 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3370
3371 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3372 if (reg_offset != 7)
3373 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3374
e3c7656c 3375 break;
aaa36a97 3376 default:
90bea0ab
TSD
3377 dev_warn(adev->dev,
3378 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3379 adev->asic_type);
3380
3381 case CHIP_CARRIZO:
3382 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3383 PIPE_CONFIG(ADDR_SURF_P2) |
3384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3386 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3387 PIPE_CONFIG(ADDR_SURF_P2) |
3388 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3389 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3390 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3391 PIPE_CONFIG(ADDR_SURF_P2) |
3392 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3393 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3394 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3395 PIPE_CONFIG(ADDR_SURF_P2) |
3396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3397 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3398 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3399 PIPE_CONFIG(ADDR_SURF_P2) |
3400 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3401 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3402 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3403 PIPE_CONFIG(ADDR_SURF_P2) |
3404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3406 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3407 PIPE_CONFIG(ADDR_SURF_P2) |
3408 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3409 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3410 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3411 PIPE_CONFIG(ADDR_SURF_P2));
3412 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3413 PIPE_CONFIG(ADDR_SURF_P2) |
3414 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3416 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3417 PIPE_CONFIG(ADDR_SURF_P2) |
3418 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3420 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3421 PIPE_CONFIG(ADDR_SURF_P2) |
3422 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3424 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3425 PIPE_CONFIG(ADDR_SURF_P2) |
3426 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3428 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3429 PIPE_CONFIG(ADDR_SURF_P2) |
3430 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3432 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3433 PIPE_CONFIG(ADDR_SURF_P2) |
3434 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3436 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3437 PIPE_CONFIG(ADDR_SURF_P2) |
3438 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3440 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3441 PIPE_CONFIG(ADDR_SURF_P2) |
3442 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3444 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3445 PIPE_CONFIG(ADDR_SURF_P2) |
3446 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3448 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3449 PIPE_CONFIG(ADDR_SURF_P2) |
3450 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3452 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3453 PIPE_CONFIG(ADDR_SURF_P2) |
3454 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3456 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3457 PIPE_CONFIG(ADDR_SURF_P2) |
3458 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3460 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3461 PIPE_CONFIG(ADDR_SURF_P2) |
3462 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3464 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3465 PIPE_CONFIG(ADDR_SURF_P2) |
3466 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3468 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3469 PIPE_CONFIG(ADDR_SURF_P2) |
3470 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3472 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3473 PIPE_CONFIG(ADDR_SURF_P2) |
3474 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3476 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3477 PIPE_CONFIG(ADDR_SURF_P2) |
3478 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3480 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3481 PIPE_CONFIG(ADDR_SURF_P2) |
3482 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3484
3485 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3488 NUM_BANKS(ADDR_SURF_8_BANK));
3489 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3492 NUM_BANKS(ADDR_SURF_8_BANK));
3493 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3496 NUM_BANKS(ADDR_SURF_8_BANK));
3497 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3500 NUM_BANKS(ADDR_SURF_8_BANK));
3501 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3504 NUM_BANKS(ADDR_SURF_8_BANK));
3505 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3508 NUM_BANKS(ADDR_SURF_8_BANK));
3509 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3512 NUM_BANKS(ADDR_SURF_8_BANK));
3513 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3516 NUM_BANKS(ADDR_SURF_16_BANK));
3517 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3520 NUM_BANKS(ADDR_SURF_16_BANK));
3521 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3524 NUM_BANKS(ADDR_SURF_16_BANK));
3525 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3528 NUM_BANKS(ADDR_SURF_16_BANK));
3529 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3532 NUM_BANKS(ADDR_SURF_16_BANK));
3533 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3536 NUM_BANKS(ADDR_SURF_16_BANK));
3537 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3540 NUM_BANKS(ADDR_SURF_8_BANK));
3541
3542 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3543 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3544 reg_offset != 23)
3545 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3546
3547 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3548 if (reg_offset != 7)
3549 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3550
3551 break;
aaa36a97
AD
3552 }
3553}
3554
05fb7291 3555static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3556 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3557{
9559ef5b
TSD
3558 u32 data;
3559
3560 if (instance == 0xffffffff)
3561 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3562 else
3563 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97 3564
5003f278 3565 if (se_num == 0xffffffff)
aaa36a97 3566 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
5003f278 3567 else
aaa36a97 3568 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
5003f278
TSD
3569
3570 if (sh_num == 0xffffffff)
3571 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3572 else
aaa36a97 3573 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
5003f278 3574
aaa36a97
AD
3575 WREG32(mmGRBM_GFX_INDEX, data);
3576}
3577
8f8e00c1
AD
3578static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3579{
3580 return (u32)((1ULL << bit_width) - 1);
3581}
3582
3583static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3584{
3585 u32 data, mask;
3586
5003f278
TSD
3587 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3588 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
aaa36a97 3589
5003f278 3590 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
aaa36a97 3591
8f8e00c1
AD
3592 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3593 adev->gfx.config.max_sh_per_se);
aaa36a97 3594
8f8e00c1 3595 return (~data) & mask;
aaa36a97
AD
3596}
3597
167ac573
HR
3598static void
3599gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3600{
3601 switch (adev->asic_type) {
3602 case CHIP_FIJI:
3603 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3604 RB_XSEL2(1) | PKR_MAP(2) |
3605 PKR_XSEL(1) | PKR_YSEL(1) |
3606 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3607 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3608 SE_PAIR_YSEL(2);
3609 break;
3610 case CHIP_TONGA:
3611 case CHIP_POLARIS10:
3612 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3613 SE_XSEL(1) | SE_YSEL(1);
3614 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3615 SE_PAIR_YSEL(2);
3616 break;
3617 case CHIP_TOPAZ:
3618 case CHIP_CARRIZO:
3619 *rconf |= RB_MAP_PKR0(2);
3620 *rconf1 |= 0x0;
3621 break;
3622 case CHIP_POLARIS11:
c4642a47 3623 case CHIP_POLARIS12:
167ac573
HR
3624 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3625 SE_XSEL(1) | SE_YSEL(1);
3626 *rconf1 |= 0x0;
3627 break;
3628 case CHIP_STONEY:
3629 *rconf |= 0x0;
3630 *rconf1 |= 0x0;
3631 break;
3632 default:
3633 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3634 break;
3635 }
3636}
3637
3638static void
3639gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3640 u32 raster_config, u32 raster_config_1,
3641 unsigned rb_mask, unsigned num_rb)
3642{
3643 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3644 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3645 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3646 unsigned rb_per_se = num_rb / num_se;
3647 unsigned se_mask[4];
3648 unsigned se;
3649
3650 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3651 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3652 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3653 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3654
3655 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3656 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3657 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3658
3659 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3660 (!se_mask[2] && !se_mask[3]))) {
3661 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3662
3663 if (!se_mask[0] && !se_mask[1]) {
3664 raster_config_1 |=
3665 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3666 } else {
3667 raster_config_1 |=
3668 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3669 }
3670 }
3671
3672 for (se = 0; se < num_se; se++) {
3673 unsigned raster_config_se = raster_config;
3674 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3675 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3676 int idx = (se / 2) * 2;
3677
3678 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3679 raster_config_se &= ~SE_MAP_MASK;
3680
3681 if (!se_mask[idx]) {
3682 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3683 } else {
3684 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3685 }
3686 }
3687
3688 pkr0_mask &= rb_mask;
3689 pkr1_mask &= rb_mask;
3690 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3691 raster_config_se &= ~PKR_MAP_MASK;
3692
3693 if (!pkr0_mask) {
3694 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3695 } else {
3696 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3697 }
3698 }
3699
3700 if (rb_per_se >= 2) {
3701 unsigned rb0_mask = 1 << (se * rb_per_se);
3702 unsigned rb1_mask = rb0_mask << 1;
3703
3704 rb0_mask &= rb_mask;
3705 rb1_mask &= rb_mask;
3706 if (!rb0_mask || !rb1_mask) {
3707 raster_config_se &= ~RB_MAP_PKR0_MASK;
3708
3709 if (!rb0_mask) {
3710 raster_config_se |=
3711 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3712 } else {
3713 raster_config_se |=
3714 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3715 }
3716 }
3717
3718 if (rb_per_se > 2) {
3719 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3720 rb1_mask = rb0_mask << 1;
3721 rb0_mask &= rb_mask;
3722 rb1_mask &= rb_mask;
3723 if (!rb0_mask || !rb1_mask) {
3724 raster_config_se &= ~RB_MAP_PKR1_MASK;
3725
3726 if (!rb0_mask) {
3727 raster_config_se |=
3728 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3729 } else {
3730 raster_config_se |=
3731 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3732 }
3733 }
3734 }
3735 }
3736
3737 /* GRBM_GFX_INDEX has a different offset on VI */
3738 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3739 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3740 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3741 }
3742
3743 /* GRBM_GFX_INDEX has a different offset on VI */
3744 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3745}
3746
8f8e00c1 3747static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3748{
3749 int i, j;
aac1e3ca 3750 u32 data;
167ac573 3751 u32 raster_config = 0, raster_config_1 = 0;
8f8e00c1 3752 u32 active_rbs = 0;
6157bd7a
FC
3753 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3754 adev->gfx.config.max_sh_per_se;
167ac573 3755 unsigned num_rb_pipes;
aaa36a97
AD
3756
3757 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3758 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3759 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3760 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3761 data = gfx_v8_0_get_rb_active_bitmap(adev);
3762 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3763 rb_bitmap_width_per_sh);
aaa36a97
AD
3764 }
3765 }
9559ef5b 3766 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97 3767
8f8e00c1 3768 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3769 adev->gfx.config.num_rbs = hweight32(active_rbs);
167ac573
HR
3770
3771 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3772 adev->gfx.config.max_shader_engines, 16);
3773
3774 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3775
3776 if (!adev->gfx.config.backend_enable_mask ||
3777 adev->gfx.config.num_rbs >= num_rb_pipes) {
3778 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3779 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3780 } else {
3781 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3782 adev->gfx.config.backend_enable_mask,
3783 num_rb_pipes);
3784 }
3785
392f0c77
AD
3786 /* cache the values for userspace */
3787 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3788 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3789 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3790 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3791 RREG32(mmCC_RB_BACKEND_DISABLE);
3792 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3793 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3794 adev->gfx.config.rb_config[i][j].raster_config =
3795 RREG32(mmPA_SC_RASTER_CONFIG);
3796 adev->gfx.config.rb_config[i][j].raster_config_1 =
3797 RREG32(mmPA_SC_RASTER_CONFIG_1);
3798 }
3799 }
3800 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
167ac573 3801 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
3802}
3803
cd06bf68 3804/**
35c7a952 3805 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
3806 *
3807 * @rdev: amdgpu_device pointer
3808 *
3809 * Initialize compute vmid sh_mem registers
3810 *
3811 */
3812#define DEFAULT_SH_MEM_BASES (0x6000)
3813#define FIRST_COMPUTE_VMID (8)
3814#define LAST_COMPUTE_VMID (16)
35c7a952 3815static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3816{
3817 int i;
3818 uint32_t sh_mem_config;
3819 uint32_t sh_mem_bases;
3820
3821 /*
3822 * Configure apertures:
3823 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3824 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3825 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3826 */
3827 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3828
3829 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3830 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3831 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3832 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3833 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3834 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3835
3836 mutex_lock(&adev->srbm_mutex);
3837 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3838 vi_srbm_select(adev, 0, 0, 0, i);
3839 /* CP and shaders */
3840 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3841 WREG32(mmSH_MEM_APE1_BASE, 1);
3842 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3843 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3844 }
3845 vi_srbm_select(adev, 0, 0, 0, 0);
3846 mutex_unlock(&adev->srbm_mutex);
3847}
3848
df6e2c4a
JZ
3849static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3850{
3851 switch (adev->asic_type) {
3852 default:
3853 adev->gfx.config.double_offchip_lds_buf = 1;
3854 break;
3855 case CHIP_CARRIZO:
3856 case CHIP_STONEY:
3857 adev->gfx.config.double_offchip_lds_buf = 0;
3858 break;
3859 }
3860}
3861
aaa36a97
AD
3862static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3863{
8fe73328 3864 u32 tmp, sh_static_mem_cfg;
aaa36a97
AD
3865 int i;
3866
61cb8cef 3867 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
0bde3a95
AD
3868 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3869 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3870 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3871
3872 gfx_v8_0_tiling_mode_table_init(adev);
8f8e00c1 3873 gfx_v8_0_setup_rb(adev);
7dae69a2 3874 gfx_v8_0_get_cu_info(adev);
df6e2c4a 3875 gfx_v8_0_config_init(adev);
aaa36a97
AD
3876
3877 /* XXX SH_MEM regs */
3878 /* where to put LDS, scratch, GPUVM in FSA64 space */
8fe73328
JZ
3879 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3880 SWIZZLE_ENABLE, 1);
3881 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3882 ELEMENT_SIZE, 1);
3883 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3884 INDEX_STRIDE, 3);
aaa36a97 3885 mutex_lock(&adev->srbm_mutex);
8fe73328 3886 for (i = 0; i < adev->vm_manager.num_ids; i++) {
aaa36a97
AD
3887 vi_srbm_select(adev, 0, 0, 0, i);
3888 /* CP and shaders */
3889 if (i == 0) {
3890 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3891 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3892 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3893 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3894 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328 3895 WREG32(mmSH_MEM_BASES, 0);
aaa36a97
AD
3896 } else {
3897 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
8fe73328 3898 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3899 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3900 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3901 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328
JZ
3902 tmp = adev->mc.shared_aperture_start >> 48;
3903 WREG32(mmSH_MEM_BASES, tmp);
aaa36a97
AD
3904 }
3905
3906 WREG32(mmSH_MEM_APE1_BASE, 1);
3907 WREG32(mmSH_MEM_APE1_LIMIT, 0);
8fe73328 3908 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
aaa36a97
AD
3909 }
3910 vi_srbm_select(adev, 0, 0, 0, 0);
3911 mutex_unlock(&adev->srbm_mutex);
3912
35c7a952 3913 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3914
aaa36a97
AD
3915 mutex_lock(&adev->grbm_idx_mutex);
3916 /*
3917 * making sure that the following register writes will be broadcasted
3918 * to all the shaders
3919 */
9559ef5b 3920 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3921
3922 WREG32(mmPA_SC_FIFO_SIZE,
3923 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3924 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3925 (adev->gfx.config.sc_prim_fifo_size_backend <<
3926 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3927 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3928 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3929 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3930 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
d2383267 3931
3932 tmp = RREG32(mmSPI_ARB_PRIORITY);
3933 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3934 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3935 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3936 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3937 WREG32(mmSPI_ARB_PRIORITY, tmp);
3938
aaa36a97
AD
3939 mutex_unlock(&adev->grbm_idx_mutex);
3940
3941}
3942
3943static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3944{
3945 u32 i, j, k;
3946 u32 mask;
3947
3948 mutex_lock(&adev->grbm_idx_mutex);
3949 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3950 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3951 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
3952 for (k = 0; k < adev->usec_timeout; k++) {
3953 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3954 break;
3955 udelay(1);
3956 }
3957 }
3958 }
9559ef5b 3959 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3960 mutex_unlock(&adev->grbm_idx_mutex);
3961
3962 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3963 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3964 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3965 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3966 for (k = 0; k < adev->usec_timeout; k++) {
3967 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3968 break;
3969 udelay(1);
3970 }
3971}
3972
3973static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3974 bool enable)
3975{
3976 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3977
0d07db7e
TSD
3978 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3979 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3980 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3981 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3982
aaa36a97
AD
3983 WREG32(mmCP_INT_CNTL_RING0, tmp);
3984}
3985
2b6cd977
EH
3986static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3987{
3988 /* csib */
3989 WREG32(mmRLC_CSIB_ADDR_HI,
3990 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3991 WREG32(mmRLC_CSIB_ADDR_LO,
3992 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3993 WREG32(mmRLC_CSIB_LENGTH,
3994 adev->gfx.rlc.clear_state_size);
3995}
3996
3997static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3998 int ind_offset,
3999 int list_size,
4000 int *unique_indices,
4001 int *indices_count,
4002 int max_indices,
4003 int *ind_start_offsets,
4004 int *offset_count,
4005 int max_offset)
4006{
4007 int indices;
4008 bool new_entry = true;
4009
4010 for (; ind_offset < list_size; ind_offset++) {
4011
4012 if (new_entry) {
4013 new_entry = false;
4014 ind_start_offsets[*offset_count] = ind_offset;
4015 *offset_count = *offset_count + 1;
4016 BUG_ON(*offset_count >= max_offset);
4017 }
4018
4019 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4020 new_entry = true;
4021 continue;
4022 }
4023
4024 ind_offset += 2;
4025
4026 /* look for the matching indice */
4027 for (indices = 0;
4028 indices < *indices_count;
4029 indices++) {
4030 if (unique_indices[indices] ==
4031 register_list_format[ind_offset])
4032 break;
4033 }
4034
4035 if (indices >= *indices_count) {
4036 unique_indices[*indices_count] =
4037 register_list_format[ind_offset];
4038 indices = *indices_count;
4039 *indices_count = *indices_count + 1;
4040 BUG_ON(*indices_count >= max_indices);
4041 }
4042
4043 register_list_format[ind_offset] = indices;
4044 }
4045}
4046
4047static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4048{
4049 int i, temp, data;
4050 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4051 int indices_count = 0;
4052 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4053 int offset_count = 0;
4054
4055 int list_size;
4056 unsigned int *register_list_format =
4057 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3f12325a 4058 if (!register_list_format)
2b6cd977
EH
4059 return -ENOMEM;
4060 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4061 adev->gfx.rlc.reg_list_format_size_bytes);
4062
4063 gfx_v8_0_parse_ind_reg_list(register_list_format,
4064 RLC_FormatDirectRegListLength,
4065 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4066 unique_indices,
4067 &indices_count,
4068 sizeof(unique_indices) / sizeof(int),
4069 indirect_start_offsets,
4070 &offset_count,
4071 sizeof(indirect_start_offsets)/sizeof(int));
4072
4073 /* save and restore list */
61cb8cef 4074 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
2b6cd977
EH
4075
4076 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4077 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4078 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4079
4080 /* indirect list */
4081 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4082 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4083 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4084
4085 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4086 list_size = list_size >> 1;
4087 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4088 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4089
4090 /* starting offsets starts */
4091 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4092 adev->gfx.rlc.starting_offsets_start);
4093 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4094 WREG32(mmRLC_GPM_SCRATCH_DATA,
4095 indirect_start_offsets[i]);
4096
4097 /* unique indices */
4098 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4099 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4100 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
202e0b22 4101 if (unique_indices[i] != 0) {
b85c9d2a
ML
4102 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4103 WREG32(data + i, unique_indices[i] >> 20);
202e0b22 4104 }
2b6cd977
EH
4105 }
4106 kfree(register_list_format);
4107
4108 return 0;
4109}
4110
4111static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4112{
61cb8cef 4113 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
2b6cd977
EH
4114}
4115
fb16007b 4116static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
4117{
4118 uint32_t data;
4119
c4d17b81
RZ
4120 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4121
4122 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4123 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4124 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4125 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4126 WREG32(mmRLC_PG_DELAY, data);
4127
4128 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4129 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4130
f4bfffdd
EH
4131}
4132
2c547165
AD
4133static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4134 bool enable)
4135{
61cb8cef 4136 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
2c547165
AD
4137}
4138
4139static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4140 bool enable)
4141{
61cb8cef 4142 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
2c547165
AD
4143}
4144
4145static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4146{
eb584241 4147 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
2c547165
AD
4148}
4149
2b6cd977
EH
4150static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4151{
c4d17b81
RZ
4152 if ((adev->asic_type == CHIP_CARRIZO) ||
4153 (adev->asic_type == CHIP_STONEY)) {
2b6cd977
EH
4154 gfx_v8_0_init_csb(adev);
4155 gfx_v8_0_init_save_restore_list(adev);
4156 gfx_v8_0_enable_save_restore_machine(adev);
c4d17b81
RZ
4157 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4158 gfx_v8_0_init_power_gating(adev);
4159 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
c4642a47
JZ
4160 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4161 (adev->asic_type == CHIP_POLARIS12)) {
c4d17b81
RZ
4162 gfx_v8_0_init_csb(adev);
4163 gfx_v8_0_init_save_restore_list(adev);
4164 gfx_v8_0_enable_save_restore_machine(adev);
4165 gfx_v8_0_init_power_gating(adev);
2b6cd977 4166 }
c4d17b81 4167
2b6cd977
EH
4168}
4169
761c2e82 4170static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
aaa36a97 4171{
61cb8cef 4172 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
aaa36a97
AD
4173
4174 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
aaa36a97
AD
4175 gfx_v8_0_wait_for_rlc_serdes(adev);
4176}
4177
4178static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4179{
61cb8cef 4180 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
aaa36a97 4181 udelay(50);
61cb8cef
TSD
4182
4183 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
aaa36a97
AD
4184 udelay(50);
4185}
4186
4187static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4188{
61cb8cef 4189 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
aaa36a97
AD
4190
4191 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 4192 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4193 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4194
4195 udelay(50);
4196}
4197
4198static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4199{
4200 const struct rlc_firmware_header_v2_0 *hdr;
4201 const __le32 *fw_data;
4202 unsigned i, fw_size;
4203
4204 if (!adev->gfx.rlc_fw)
4205 return -EINVAL;
4206
4207 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4208 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
4209
4210 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4211 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4212 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4213
4214 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4215 for (i = 0; i < fw_size; i++)
4216 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4217 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4218
4219 return 0;
4220}
4221
4222static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4223{
4224 int r;
6ae81452 4225 u32 tmp;
aaa36a97
AD
4226
4227 gfx_v8_0_rlc_stop(adev);
4228
4229 /* disable CG */
6ae81452
AD
4230 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4231 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4232 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4233 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
2cc0c0b5 4234 if (adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
4235 adev->asic_type == CHIP_POLARIS10 ||
4236 adev->asic_type == CHIP_POLARIS12) {
6ae81452
AD
4237 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4238 tmp &= ~0x3;
4239 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4240 }
aaa36a97
AD
4241
4242 /* disable PG */
4243 WREG32(mmRLC_PG_CNTL, 0);
4244
4245 gfx_v8_0_rlc_reset(adev);
2b6cd977
EH
4246 gfx_v8_0_init_pg(adev);
4247
e61710c5 4248 if (!adev->pp_enabled) {
ba5c2a87
RZ
4249 if (!adev->firmware.smu_load) {
4250 /* legacy rlc firmware loading */
4251 r = gfx_v8_0_rlc_load_microcode(adev);
4252 if (r)
4253 return r;
4254 } else {
4255 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4256 AMDGPU_UCODE_ID_RLC_G);
4257 if (r)
4258 return -EINVAL;
4259 }
aaa36a97
AD
4260 }
4261
4262 gfx_v8_0_rlc_start(adev);
4263
4264 return 0;
4265}
4266
4267static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4268{
4269 int i;
4270 u32 tmp = RREG32(mmCP_ME_CNTL);
4271
4272 if (enable) {
4273 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4274 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4276 } else {
4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4278 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4279 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4280 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4281 adev->gfx.gfx_ring[i].ready = false;
4282 }
4283 WREG32(mmCP_ME_CNTL, tmp);
4284 udelay(50);
4285}
4286
4287static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4288{
4289 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4290 const struct gfx_firmware_header_v1_0 *ce_hdr;
4291 const struct gfx_firmware_header_v1_0 *me_hdr;
4292 const __le32 *fw_data;
4293 unsigned i, fw_size;
4294
4295 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4296 return -EINVAL;
4297
4298 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4299 adev->gfx.pfp_fw->data;
4300 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4301 adev->gfx.ce_fw->data;
4302 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4303 adev->gfx.me_fw->data;
4304
4305 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4306 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4307 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4308
4309 gfx_v8_0_cp_gfx_enable(adev, false);
4310
4311 /* PFP */
4312 fw_data = (const __le32 *)
4313 (adev->gfx.pfp_fw->data +
4314 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4315 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4316 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4317 for (i = 0; i < fw_size; i++)
4318 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4319 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4320
4321 /* CE */
4322 fw_data = (const __le32 *)
4323 (adev->gfx.ce_fw->data +
4324 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4325 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4326 WREG32(mmCP_CE_UCODE_ADDR, 0);
4327 for (i = 0; i < fw_size; i++)
4328 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4329 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4330
4331 /* ME */
4332 fw_data = (const __le32 *)
4333 (adev->gfx.me_fw->data +
4334 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4335 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4336 WREG32(mmCP_ME_RAM_WADDR, 0);
4337 for (i = 0; i < fw_size; i++)
4338 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4339 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4340
4341 return 0;
4342}
4343
4344static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4345{
4346 u32 count = 0;
4347 const struct cs_section_def *sect = NULL;
4348 const struct cs_extent_def *ext = NULL;
4349
4350 /* begin clear state */
4351 count += 2;
4352 /* context control state */
4353 count += 3;
4354
4355 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4356 for (ext = sect->section; ext->extent != NULL; ++ext) {
4357 if (sect->id == SECT_CONTEXT)
4358 count += 2 + ext->reg_count;
4359 else
4360 return 0;
4361 }
4362 }
4363 /* pa_sc_raster_config/pa_sc_raster_config1 */
4364 count += 4;
4365 /* end clear state */
4366 count += 2;
4367 /* clear state */
4368 count += 2;
4369
4370 return count;
4371}
4372
4373static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4374{
4375 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4376 const struct cs_section_def *sect = NULL;
4377 const struct cs_extent_def *ext = NULL;
4378 int r, i;
4379
4380 /* init the CP */
4381 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4382 WREG32(mmCP_ENDIAN_SWAP, 0);
4383 WREG32(mmCP_DEVICE_ID, 1);
4384
4385 gfx_v8_0_cp_gfx_enable(adev, true);
4386
a27de35c 4387 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4388 if (r) {
4389 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4390 return r;
4391 }
4392
4393 /* clear state buffer */
4394 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4395 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4396
4397 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4398 amdgpu_ring_write(ring, 0x80000000);
4399 amdgpu_ring_write(ring, 0x80000000);
4400
4401 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4402 for (ext = sect->section; ext->extent != NULL; ++ext) {
4403 if (sect->id == SECT_CONTEXT) {
4404 amdgpu_ring_write(ring,
4405 PACKET3(PACKET3_SET_CONTEXT_REG,
4406 ext->reg_count));
4407 amdgpu_ring_write(ring,
4408 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4409 for (i = 0; i < ext->reg_count; i++)
4410 amdgpu_ring_write(ring, ext->extent[i]);
4411 }
4412 }
4413 }
4414
4415 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4416 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4417 switch (adev->asic_type) {
4418 case CHIP_TONGA:
2cc0c0b5 4419 case CHIP_POLARIS10:
aaa36a97
AD
4420 amdgpu_ring_write(ring, 0x16000012);
4421 amdgpu_ring_write(ring, 0x0000002A);
4422 break;
2cc0c0b5 4423 case CHIP_POLARIS11:
c4642a47 4424 case CHIP_POLARIS12:
68182d90
FC
4425 amdgpu_ring_write(ring, 0x16000012);
4426 amdgpu_ring_write(ring, 0x00000000);
4427 break;
fa676048
FC
4428 case CHIP_FIJI:
4429 amdgpu_ring_write(ring, 0x3a00161a);
4430 amdgpu_ring_write(ring, 0x0000002e);
4431 break;
aaa36a97
AD
4432 case CHIP_CARRIZO:
4433 amdgpu_ring_write(ring, 0x00000002);
4434 amdgpu_ring_write(ring, 0x00000000);
4435 break;
d1a7f7aa
KW
4436 case CHIP_TOPAZ:
4437 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4438 0x00000000 : 0x00000002);
4439 amdgpu_ring_write(ring, 0x00000000);
4440 break;
e3c7656c
SL
4441 case CHIP_STONEY:
4442 amdgpu_ring_write(ring, 0x00000000);
4443 amdgpu_ring_write(ring, 0x00000000);
4444 break;
aaa36a97
AD
4445 default:
4446 BUG();
4447 }
4448
4449 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4450 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4451
4452 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4453 amdgpu_ring_write(ring, 0);
4454
4455 /* init the CE partitions */
4456 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4457 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4458 amdgpu_ring_write(ring, 0x8000);
4459 amdgpu_ring_write(ring, 0x8000);
4460
a27de35c 4461 amdgpu_ring_commit(ring);
aaa36a97
AD
4462
4463 return 0;
4464}
4465
4466static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4467{
4468 struct amdgpu_ring *ring;
4469 u32 tmp;
4470 u32 rb_bufsz;
42e8cb50 4471 u64 rb_addr, rptr_addr, wptr_gpu_addr;
aaa36a97
AD
4472 int r;
4473
4474 /* Set the write pointer delay */
4475 WREG32(mmCP_RB_WPTR_DELAY, 0);
4476
4477 /* set the RB to use vmid 0 */
4478 WREG32(mmCP_RB_VMID, 0);
4479
4480 /* Set ring buffer size */
4481 ring = &adev->gfx.gfx_ring[0];
4482 rb_bufsz = order_base_2(ring->ring_size / 8);
4483 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4484 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4485 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4486 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4487#ifdef __BIG_ENDIAN
4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4489#endif
4490 WREG32(mmCP_RB0_CNTL, tmp);
4491
4492 /* Initialize the ring buffer's read and write pointers */
4493 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4494 ring->wptr = 0;
536fbf94 4495 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
4496
4497 /* set the wb address wether it's enabled or not */
4498 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4499 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4500 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4501
42e8cb50
FM
4502 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4503 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4504 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
aaa36a97
AD
4505 mdelay(1);
4506 WREG32(mmCP_RB0_CNTL, tmp);
4507
4508 rb_addr = ring->gpu_addr >> 8;
4509 WREG32(mmCP_RB0_BASE, rb_addr);
4510 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4511
4512 /* no gfx doorbells on iceland */
4513 if (adev->asic_type != CHIP_TOPAZ) {
4514 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4515 if (ring->use_doorbell) {
4516 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4517 DOORBELL_OFFSET, ring->doorbell_index);
68182d90
FC
4518 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4519 DOORBELL_HIT, 0);
aaa36a97
AD
4520 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4521 DOORBELL_EN, 1);
4522 } else {
4523 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4524 DOORBELL_EN, 0);
4525 }
4526 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4527
4528 if (adev->asic_type == CHIP_TONGA) {
4529 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4530 DOORBELL_RANGE_LOWER,
4531 AMDGPU_DOORBELL_GFX_RING0);
4532 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4533
4534 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4535 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4536 }
4537
4538 }
4539
4540 /* start the ring */
f6bd7942 4541 amdgpu_ring_clear_ring(ring);
aaa36a97
AD
4542 gfx_v8_0_cp_gfx_start(adev);
4543 ring->ready = true;
4544 r = amdgpu_ring_test_ring(ring);
5003f278 4545 if (r)
aaa36a97 4546 ring->ready = false;
aaa36a97 4547
5003f278 4548 return r;
aaa36a97
AD
4549}
4550
4551static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4552{
4553 int i;
4554
4555 if (enable) {
4556 WREG32(mmCP_MEC_CNTL, 0);
4557 } else {
4558 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4559 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4560 adev->gfx.compute_ring[i].ready = false;
4561 }
4562 udelay(50);
4563}
4564
aaa36a97
AD
4565static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4566{
4567 const struct gfx_firmware_header_v1_0 *mec_hdr;
4568 const __le32 *fw_data;
4569 unsigned i, fw_size;
4570
4571 if (!adev->gfx.mec_fw)
4572 return -EINVAL;
4573
4574 gfx_v8_0_cp_compute_enable(adev, false);
4575
4576 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4577 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4578
4579 fw_data = (const __le32 *)
4580 (adev->gfx.mec_fw->data +
4581 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4582 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4583
4584 /* MEC1 */
4585 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4586 for (i = 0; i < fw_size; i++)
4587 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4588 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4589
4590 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4591 if (adev->gfx.mec2_fw) {
4592 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4593
4594 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4595 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4596
4597 fw_data = (const __le32 *)
4598 (adev->gfx.mec2_fw->data +
4599 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4600 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4601
4602 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4603 for (i = 0; i < fw_size; i++)
4604 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4605 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4606 }
4607
4608 return 0;
4609}
4610
aaa36a97
AD
4611static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4612{
4613 int i, r;
4614
4615 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4616 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4617
4618 if (ring->mqd_obj) {
4619 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4620 if (unlikely(r != 0))
4621 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4622
4623 amdgpu_bo_unpin(ring->mqd_obj);
4624 amdgpu_bo_unreserve(ring->mqd_obj);
4625
4626 amdgpu_bo_unref(&ring->mqd_obj);
4627 ring->mqd_obj = NULL;
f3972b53
ML
4628 ring->mqd_ptr = NULL;
4629 ring->mqd_gpu_addr = 0;
aaa36a97
AD
4630 }
4631 }
4632}
4633
4e638ae9
XY
4634/* KIQ functions */
4635static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4636{
4637 uint32_t tmp;
4638 struct amdgpu_device *adev = ring->adev;
4639
4640 /* tell RLC which is KIQ queue */
4641 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4642 tmp &= 0xffffff00;
4643 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4644 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4645 tmp |= 0x80;
4646 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4647}
4648
4649static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4650{
4651 amdgpu_ring_alloc(ring, 8);
4652 /* set resources */
4653 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4654 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4655 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */
4656 amdgpu_ring_write(ring, 0); /* queue mask hi */
4657 amdgpu_ring_write(ring, 0); /* gws mask lo */
4658 amdgpu_ring_write(ring, 0); /* gws mask hi */
4659 amdgpu_ring_write(ring, 0); /* oac mask */
4660 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */
4661 amdgpu_ring_commit(ring);
4662 udelay(50);
4663}
4664
4665static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4666 struct amdgpu_ring *ring)
4667{
4668 struct amdgpu_device *adev = kiq_ring->adev;
4669 uint64_t mqd_addr, wptr_addr;
4670
4671 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4672 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4673 amdgpu_ring_alloc(kiq_ring, 8);
4674
4675 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4676 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4677 amdgpu_ring_write(kiq_ring, 0x21010000);
4678 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4679 (ring->queue << 26) |
4680 (ring->pipe << 29) |
4681 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4682 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4683 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4684 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4685 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4686 amdgpu_ring_commit(kiq_ring);
4687 udelay(50);
4688}
4689
4690static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
4691 struct vi_mqd *mqd,
4692 uint64_t mqd_gpu_addr,
4693 uint64_t eop_gpu_addr,
4694 struct amdgpu_ring *ring)
4695{
4696 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4697 uint32_t tmp;
4698
4699 mqd->header = 0xC0310800;
4700 mqd->compute_pipelinestat_enable = 0x00000001;
4701 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4702 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4703 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4704 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4705 mqd->compute_misc_reserved = 0x00000003;
4706
4707 eop_base_addr = eop_gpu_addr >> 8;
4708 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4709 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4710
4711 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4712 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4713 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4714 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4715
4716 mqd->cp_hqd_eop_control = tmp;
4717
4718 /* enable doorbell? */
4719 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4720
4721 if (ring->use_doorbell)
4722 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4723 DOORBELL_EN, 1);
4724 else
4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4726 DOORBELL_EN, 0);
4727
4728 mqd->cp_hqd_pq_doorbell_control = tmp;
4729
4730 /* disable the queue if it's active */
4731 mqd->cp_hqd_dequeue_request = 0;
4732 mqd->cp_hqd_pq_rptr = 0;
4733 mqd->cp_hqd_pq_wptr = 0;
4734
4735 /* set the pointer to the MQD */
4736 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4737 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4738
4739 /* set MQD vmid to 0 */
4740 tmp = RREG32(mmCP_MQD_CONTROL);
4741 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4742 mqd->cp_mqd_control = tmp;
4743
4744 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4745 hqd_gpu_addr = ring->gpu_addr >> 8;
4746 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4747 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4748
4749 /* set up the HQD, this is similar to CP_RB0_CNTL */
4750 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4751 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4752 (order_base_2(ring->ring_size / 4) - 1));
4753 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4754 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4755#ifdef __BIG_ENDIAN
4756 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4757#endif
4758 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4759 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4760 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4762 mqd->cp_hqd_pq_control = tmp;
4763
4764 /* set the wb address whether it's enabled or not */
4765 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4766 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4767 mqd->cp_hqd_pq_rptr_report_addr_hi =
4768 upper_32_bits(wb_gpu_addr) & 0xffff;
4769
4770 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4771 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4772 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4773 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4774
4775 tmp = 0;
4776 /* enable the doorbell if requested */
4777 if (ring->use_doorbell) {
4778 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4779 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4780 DOORBELL_OFFSET, ring->doorbell_index);
4781
4782 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4783 DOORBELL_EN, 1);
4784 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4785 DOORBELL_SOURCE, 0);
4786 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4787 DOORBELL_HIT, 0);
4788 }
4789
4790 mqd->cp_hqd_pq_doorbell_control = tmp;
4791
4792 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4793 ring->wptr = 0;
4794 mqd->cp_hqd_pq_wptr = ring->wptr;
4795 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4796
4797 /* set the vmid for the queue */
4798 mqd->cp_hqd_vmid = 0;
4799
4800 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4801 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4802 mqd->cp_hqd_persistent_state = tmp;
4803
4804 /* activate the queue */
4805 mqd->cp_hqd_active = 1;
4806
4807 return 0;
4808}
4809
4810static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
4811 struct vi_mqd *mqd,
4812 struct amdgpu_ring *ring)
4813{
4814 uint32_t tmp;
4815 int j;
4816
4817 /* disable wptr polling */
4818 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4819 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4820 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4821
4822 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4823 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4824
4825 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4826 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4827
4828 /* enable doorbell? */
4829 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4830
4831 /* disable the queue if it's active */
4832 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4833 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4834 for (j = 0; j < adev->usec_timeout; j++) {
4835 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4836 break;
4837 udelay(1);
4838 }
4839 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4840 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4841 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4842 }
4843
4844 /* set the pointer to the MQD */
4845 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4846 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4847
4848 /* set MQD vmid to 0 */
4849 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4850
4851 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4852 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4853 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4854
4855 /* set up the HQD, this is similar to CP_RB0_CNTL */
4856 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4857
4858 /* set the wb address whether it's enabled or not */
4859 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4860 mqd->cp_hqd_pq_rptr_report_addr_lo);
4861 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4862 mqd->cp_hqd_pq_rptr_report_addr_hi);
4863
4864 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4865 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4866 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4867
4868 /* enable the doorbell if requested */
4869 if (ring->use_doorbell) {
4870 if ((adev->asic_type == CHIP_CARRIZO) ||
4871 (adev->asic_type == CHIP_FIJI) ||
4872 (adev->asic_type == CHIP_STONEY)) {
4873 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4874 AMDGPU_DOORBELL_KIQ << 2);
4875 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4876 AMDGPU_DOORBELL_MEC_RING7 << 2);
4877 }
4878 }
4879 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4880
4881 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4882 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4883
4884 /* set the vmid for the queue */
4885 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4886
4887 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4888
4889 /* activate the queue */
4890 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4891
4892 if (ring->use_doorbell) {
4893 tmp = RREG32(mmCP_PQ_STATUS);
4894 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4895 WREG32(mmCP_PQ_STATUS, tmp);
4896 }
4897
4898 return 0;
4899}
4900
4901static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
4902 struct vi_mqd *mqd,
4903 u64 mqd_gpu_addr)
4904{
4905 struct amdgpu_device *adev = ring->adev;
4906 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4907 uint64_t eop_gpu_addr;
2da4da3c 4908 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
1fb37a3d 4909 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4e638ae9
XY
4910
4911 if (is_kiq) {
4912 eop_gpu_addr = kiq->eop_gpu_addr;
4913 gfx_v8_0_kiq_setting(&kiq->ring);
1fb37a3d 4914 } else {
4e638ae9
XY
4915 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4916 ring->queue * MEC_HPD_SIZE;
1fb37a3d
ML
4917 mqd_idx = ring - &adev->gfx.compute_ring[0];
4918 }
4e638ae9 4919
1fb37a3d
ML
4920 if (!adev->gfx.in_reset) {
4921 memset((void *)mqd, 0, sizeof(*mqd));
4922 mutex_lock(&adev->srbm_mutex);
4923 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4924 gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
4925 if (is_kiq)
4926 gfx_v8_0_kiq_init_register(adev, mqd, ring);
4927 vi_srbm_select(adev, 0, 0, 0, 0);
4928 mutex_unlock(&adev->srbm_mutex);
4929
4930 if (adev->gfx.mec.mqd_backup[mqd_idx])
4931 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4932 } else { /* for GPU_RESET case */
4933 /* reset MQD to a clean status */
4934 if (adev->gfx.mec.mqd_backup[mqd_idx])
4935 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4936
4937 /* reset ring buffer */
4938 ring->wptr = 0;
4939 amdgpu_ring_clear_ring(ring);
4940
4941 if (is_kiq) {
4942 mutex_lock(&adev->srbm_mutex);
4943 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4944 gfx_v8_0_kiq_init_register(adev, mqd, ring);
4945 vi_srbm_select(adev, 0, 0, 0, 0);
4946 mutex_unlock(&adev->srbm_mutex);
4947 }
4948 }
4e638ae9
XY
4949
4950 if (is_kiq)
4951 gfx_v8_0_kiq_enable(ring);
4952 else
4953 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4954
4955 return 0;
4956}
4957
596c67d0 4958static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4e638ae9
XY
4959{
4960 struct amdgpu_ring *ring = NULL;
596c67d0 4961 int r = 0, i;
4e638ae9 4962
596c67d0 4963 gfx_v8_0_cp_compute_enable(adev, true);
4e638ae9
XY
4964
4965 ring = &adev->gfx.kiq.ring;
59a82d7d
XY
4966 if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) {
4967 r = gfx_v8_0_kiq_init_queue(ring,
4968 (struct vi_mqd *)ring->mqd_ptr,
4969 ring->mqd_gpu_addr);
596c67d0 4970 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4971 ring->mqd_ptr = NULL;
596c67d0
ML
4972 if (r)
4973 return r;
4974 } else {
4e638ae9
XY
4975 return r;
4976 }
4977
4e638ae9
XY
4978 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4979 ring = &adev->gfx.compute_ring[i];
59a82d7d
XY
4980 if (!amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr)) {
4981 r = gfx_v8_0_kiq_init_queue(ring,
4982 (struct vi_mqd *)ring->mqd_ptr,
4983 ring->mqd_gpu_addr);
596c67d0 4984 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4985 ring->mqd_ptr = NULL;
596c67d0
ML
4986 if (r)
4987 return r;
4988 } else {
4e638ae9 4989 return r;
596c67d0 4990 }
4e638ae9
XY
4991 }
4992
4e638ae9
XY
4993 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4994 ring = &adev->gfx.compute_ring[i];
4995
4996 ring->ready = true;
4997 r = amdgpu_ring_test_ring(ring);
4998 if (r)
4999 ring->ready = false;
5000 }
5001
5002 ring = &adev->gfx.kiq.ring;
5003 ring->ready = true;
5004 r = amdgpu_ring_test_ring(ring);
5005 if (r)
5006 ring->ready = false;
5007
5008 return 0;
5009}
5010
aaa36a97
AD
5011static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5012{
5013 int r, i, j;
5014 u32 tmp;
5015 bool use_doorbell = true;
5016 u64 hqd_gpu_addr;
5017 u64 mqd_gpu_addr;
5018 u64 eop_gpu_addr;
5019 u64 wb_gpu_addr;
5020 u32 *buf;
5021 struct vi_mqd *mqd;
5022
ad3b9614 5023 /* init the queues. */
aaa36a97
AD
5024 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5025 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5026
5027 if (ring->mqd_obj == NULL) {
5028 r = amdgpu_bo_create(adev,
5029 sizeof(struct vi_mqd),
5030 PAGE_SIZE, true,
5031 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
72d7668b 5032 NULL, &ring->mqd_obj);
aaa36a97
AD
5033 if (r) {
5034 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5035 return r;
5036 }
5037 }
5038
5039 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5040 if (unlikely(r != 0)) {
5041 gfx_v8_0_cp_compute_fini(adev);
5042 return r;
5043 }
5044 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5045 &mqd_gpu_addr);
5046 if (r) {
5047 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5048 gfx_v8_0_cp_compute_fini(adev);
5049 return r;
5050 }
5051 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5052 if (r) {
5053 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5054 gfx_v8_0_cp_compute_fini(adev);
5055 return r;
5056 }
5057
5058 /* init the mqd struct */
5059 memset(buf, 0, sizeof(struct vi_mqd));
5060
5061 mqd = (struct vi_mqd *)buf;
5062 mqd->header = 0xC0310800;
5063 mqd->compute_pipelinestat_enable = 0x00000001;
5064 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5065 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5066 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5067 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5068 mqd->compute_misc_reserved = 0x00000003;
5069
5070 mutex_lock(&adev->srbm_mutex);
5071 vi_srbm_select(adev, ring->me,
5072 ring->pipe,
5073 ring->queue, 0);
5074
ad3b9614
AD
5075 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5076 eop_gpu_addr >>= 8;
5077
5078 /* write the EOP addr */
5079 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5080 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5081
5082 /* set the VMID assigned */
5083 WREG32(mmCP_HQD_VMID, 0);
5084
5085 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5086 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5087 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5088 (order_base_2(MEC_HPD_SIZE / 4) - 1));
5089 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5090
aaa36a97
AD
5091 /* disable wptr polling */
5092 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5093 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5094 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5095
5096 mqd->cp_hqd_eop_base_addr_lo =
5097 RREG32(mmCP_HQD_EOP_BASE_ADDR);
5098 mqd->cp_hqd_eop_base_addr_hi =
5099 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5100
5101 /* enable doorbell? */
5102 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5103 if (use_doorbell) {
5104 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5105 } else {
5106 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5107 }
5108 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5109 mqd->cp_hqd_pq_doorbell_control = tmp;
5110
5111 /* disable the queue if it's active */
5112 mqd->cp_hqd_dequeue_request = 0;
5113 mqd->cp_hqd_pq_rptr = 0;
5114 mqd->cp_hqd_pq_wptr= 0;
5115 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5116 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5117 for (j = 0; j < adev->usec_timeout; j++) {
5118 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5119 break;
5120 udelay(1);
5121 }
5122 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5123 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5124 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5125 }
5126
5127 /* set the pointer to the MQD */
5128 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5129 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5130 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5131 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5132
5133 /* set MQD vmid to 0 */
5134 tmp = RREG32(mmCP_MQD_CONTROL);
5135 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5136 WREG32(mmCP_MQD_CONTROL, tmp);
5137 mqd->cp_mqd_control = tmp;
5138
5139 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5140 hqd_gpu_addr = ring->gpu_addr >> 8;
5141 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5142 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5143 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5144 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5145
5146 /* set up the HQD, this is similar to CP_RB0_CNTL */
5147 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5148 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5149 (order_base_2(ring->ring_size / 4) - 1));
5150 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5151 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5152#ifdef __BIG_ENDIAN
5153 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5154#endif
5155 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5157 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5158 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5159 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5160 mqd->cp_hqd_pq_control = tmp;
5161
5162 /* set the wb address wether it's enabled or not */
5163 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5164 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5165 mqd->cp_hqd_pq_rptr_report_addr_hi =
5166 upper_32_bits(wb_gpu_addr) & 0xffff;
5167 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5168 mqd->cp_hqd_pq_rptr_report_addr_lo);
5169 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5170 mqd->cp_hqd_pq_rptr_report_addr_hi);
5171
5172 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5173 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
aeab2032 5174 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
aaa36a97 5175 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
aeab2032 5176 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
aaa36a97
AD
5177 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5178 mqd->cp_hqd_pq_wptr_poll_addr_hi);
5179
5180 /* enable the doorbell if requested */
5181 if (use_doorbell) {
bddf8026 5182 if ((adev->asic_type == CHIP_CARRIZO) ||
e3c7656c 5183 (adev->asic_type == CHIP_FIJI) ||
68182d90 5184 (adev->asic_type == CHIP_STONEY) ||
2cc0c0b5 5185 (adev->asic_type == CHIP_POLARIS11) ||
c4642a47
JZ
5186 (adev->asic_type == CHIP_POLARIS10) ||
5187 (adev->asic_type == CHIP_POLARIS12)) {
aaa36a97
AD
5188 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5189 AMDGPU_DOORBELL_KIQ << 2);
5190 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
b8826b0c 5191 AMDGPU_DOORBELL_MEC_RING7 << 2);
aaa36a97
AD
5192 }
5193 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5194 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5195 DOORBELL_OFFSET, ring->doorbell_index);
5196 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5197 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5198 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5199 mqd->cp_hqd_pq_doorbell_control = tmp;
5200
5201 } else {
5202 mqd->cp_hqd_pq_doorbell_control = 0;
5203 }
5204 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5205 mqd->cp_hqd_pq_doorbell_control);
5206
845253e7
SJ
5207 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5208 ring->wptr = 0;
536fbf94 5209 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
845253e7
SJ
5210 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5211 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5212
aaa36a97
AD
5213 /* set the vmid for the queue */
5214 mqd->cp_hqd_vmid = 0;
5215 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5216
5217 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5218 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5219 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5220 mqd->cp_hqd_persistent_state = tmp;
68182d90 5221 if (adev->asic_type == CHIP_STONEY ||
2cc0c0b5 5222 adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
5223 adev->asic_type == CHIP_POLARIS10 ||
5224 adev->asic_type == CHIP_POLARIS12) {
3b55ddad
FC
5225 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5226 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5227 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5228 }
aaa36a97
AD
5229
5230 /* activate the queue */
5231 mqd->cp_hqd_active = 1;
5232 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5233
5234 vi_srbm_select(adev, 0, 0, 0, 0);
5235 mutex_unlock(&adev->srbm_mutex);
5236
5237 amdgpu_bo_kunmap(ring->mqd_obj);
5238 amdgpu_bo_unreserve(ring->mqd_obj);
5239 }
5240
5241 if (use_doorbell) {
5242 tmp = RREG32(mmCP_PQ_STATUS);
5243 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5244 WREG32(mmCP_PQ_STATUS, tmp);
5245 }
5246
6e9821b2 5247 gfx_v8_0_cp_compute_enable(adev, true);
aaa36a97
AD
5248
5249 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5250 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5251
5252 ring->ready = true;
5253 r = amdgpu_ring_test_ring(ring);
5254 if (r)
5255 ring->ready = false;
5256 }
5257
5258 return 0;
5259}
5260
5261static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5262{
5263 int r;
5264
e3c7656c 5265 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
5266 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5267
e61710c5 5268 if (!adev->pp_enabled) {
ba5c2a87
RZ
5269 if (!adev->firmware.smu_load) {
5270 /* legacy firmware loading */
5271 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5272 if (r)
5273 return r;
aaa36a97 5274
ba5c2a87
RZ
5275 r = gfx_v8_0_cp_compute_load_microcode(adev);
5276 if (r)
5277 return r;
5278 } else {
5279 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5280 AMDGPU_UCODE_ID_CP_CE);
5281 if (r)
5282 return -EINVAL;
5283
5284 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5285 AMDGPU_UCODE_ID_CP_PFP);
5286 if (r)
5287 return -EINVAL;
5288
5289 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5290 AMDGPU_UCODE_ID_CP_ME);
5291 if (r)
5292 return -EINVAL;
5293
951e0962
AD
5294 if (adev->asic_type == CHIP_TOPAZ) {
5295 r = gfx_v8_0_cp_compute_load_microcode(adev);
5296 if (r)
5297 return r;
5298 } else {
5299 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5300 AMDGPU_UCODE_ID_CP_MEC1);
5301 if (r)
5302 return -EINVAL;
5303 }
ba5c2a87 5304 }
aaa36a97
AD
5305 }
5306
5307 r = gfx_v8_0_cp_gfx_resume(adev);
5308 if (r)
5309 return r;
5310
4e638ae9
XY
5311 if (amdgpu_sriov_vf(adev))
5312 r = gfx_v8_0_kiq_resume(adev);
5313 else
5314 r = gfx_v8_0_cp_compute_resume(adev);
aaa36a97
AD
5315 if (r)
5316 return r;
5317
5318 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5319
5320 return 0;
5321}
5322
5323static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5324{
5325 gfx_v8_0_cp_gfx_enable(adev, enable);
5326 gfx_v8_0_cp_compute_enable(adev, enable);
5327}
5328
5fc3aeeb 5329static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
5330{
5331 int r;
5fc3aeeb 5332 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5333
5334 gfx_v8_0_init_golden_registers(adev);
aaa36a97
AD
5335 gfx_v8_0_gpu_init(adev);
5336
5337 r = gfx_v8_0_rlc_resume(adev);
5338 if (r)
5339 return r;
5340
5341 r = gfx_v8_0_cp_resume(adev);
aaa36a97
AD
5342
5343 return r;
5344}
5345
5fc3aeeb 5346static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 5347{
5fc3aeeb 5348 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5349
1d22a454
AD
5350 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5351 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
84f3f05b
XY
5352 if (amdgpu_sriov_vf(adev)) {
5353 pr_debug("For SRIOV client, shouldn't do anything.\n");
5354 return 0;
5355 }
aaa36a97
AD
5356 gfx_v8_0_cp_enable(adev, false);
5357 gfx_v8_0_rlc_stop(adev);
5358 gfx_v8_0_cp_compute_fini(adev);
5359
62a86fc2
EH
5360 amdgpu_set_powergating_state(adev,
5361 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5362
aaa36a97
AD
5363 return 0;
5364}
5365
5fc3aeeb 5366static int gfx_v8_0_suspend(void *handle)
aaa36a97 5367{
5fc3aeeb 5368 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5369
aaa36a97
AD
5370 return gfx_v8_0_hw_fini(adev);
5371}
5372
5fc3aeeb 5373static int gfx_v8_0_resume(void *handle)
aaa36a97 5374{
5fc3aeeb 5375 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5376
aaa36a97
AD
5377 return gfx_v8_0_hw_init(adev);
5378}
5379
5fc3aeeb 5380static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5381{
5fc3aeeb 5382 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5383
aaa36a97
AD
5384 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5385 return false;
5386 else
5387 return true;
5388}
5389
5fc3aeeb 5390static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5391{
5392 unsigned i;
5fc3aeeb 5393 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5394
5395 for (i = 0; i < adev->usec_timeout; i++) {
5003f278 5396 if (gfx_v8_0_is_idle(handle))
aaa36a97 5397 return 0;
5003f278 5398
aaa36a97
AD
5399 udelay(1);
5400 }
5401 return -ETIMEDOUT;
5402}
5403
da146d3b 5404static bool gfx_v8_0_check_soft_reset(void *handle)
aaa36a97 5405{
3d7c6384 5406 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5407 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5408 u32 tmp;
5409
5410 /* GRBM_STATUS */
5411 tmp = RREG32(mmGRBM_STATUS);
5412 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5413 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5414 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5415 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5416 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3d7c6384
CZ
5417 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5418 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
aaa36a97
AD
5419 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5420 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5421 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5422 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
aaa36a97
AD
5423 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5424 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5425 }
5426
5427 /* GRBM_STATUS2 */
5428 tmp = RREG32(mmGRBM_STATUS2);
5429 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5430 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5431 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5432
3d7c6384
CZ
5433 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5434 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5435 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5436 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5437 SOFT_RESET_CPF, 1);
5438 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5439 SOFT_RESET_CPC, 1);
5440 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5441 SOFT_RESET_CPG, 1);
5442 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5443 SOFT_RESET_GRBM, 1);
5444 }
5445
aaa36a97
AD
5446 /* SRBM_STATUS */
5447 tmp = RREG32(mmSRBM_STATUS);
5448 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5449 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5450 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3d7c6384
CZ
5451 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5452 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5453 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
aaa36a97
AD
5454
5455 if (grbm_soft_reset || srbm_soft_reset) {
3d7c6384
CZ
5456 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5457 adev->gfx.srbm_soft_reset = srbm_soft_reset;
da146d3b 5458 return true;
3d7c6384 5459 } else {
3d7c6384
CZ
5460 adev->gfx.grbm_soft_reset = 0;
5461 adev->gfx.srbm_soft_reset = 0;
da146d3b 5462 return false;
3d7c6384 5463 }
3d7c6384 5464}
aaa36a97 5465
1057f20c
CZ
5466static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5467 struct amdgpu_ring *ring)
5468{
5469 int i;
5470
5471 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5472 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5473 u32 tmp;
5474 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5475 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5476 DEQUEUE_REQ, 2);
5477 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5478 for (i = 0; i < adev->usec_timeout; i++) {
5479 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5480 break;
5481 udelay(1);
5482 }
5483 }
5484}
5485
5486static int gfx_v8_0_pre_soft_reset(void *handle)
5487{
5488 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5489 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5490
da146d3b
AD
5491 if ((!adev->gfx.grbm_soft_reset) &&
5492 (!adev->gfx.srbm_soft_reset))
1057f20c
CZ
5493 return 0;
5494
5495 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5496 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5497
5498 /* stop the rlc */
5499 gfx_v8_0_rlc_stop(adev);
5500
5501 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5502 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
aaa36a97
AD
5503 /* Disable GFX parsing/prefetching */
5504 gfx_v8_0_cp_gfx_enable(adev, false);
5505
1057f20c
CZ
5506 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5507 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5508 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5509 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5510 int i;
5511
5512 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5513 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5514
5515 gfx_v8_0_inactive_hqd(adev, ring);
5516 }
aaa36a97 5517 /* Disable MEC parsing/prefetching */
7776a693 5518 gfx_v8_0_cp_compute_enable(adev, false);
1057f20c 5519 }
7776a693 5520
1057f20c
CZ
5521 return 0;
5522}
7776a693 5523
3d7c6384
CZ
5524static int gfx_v8_0_soft_reset(void *handle)
5525{
5526 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5527 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5528 u32 tmp;
aaa36a97 5529
da146d3b
AD
5530 if ((!adev->gfx.grbm_soft_reset) &&
5531 (!adev->gfx.srbm_soft_reset))
3d7c6384 5532 return 0;
aaa36a97 5533
3d7c6384
CZ
5534 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5535 srbm_soft_reset = adev->gfx.srbm_soft_reset;
aaa36a97 5536
3d7c6384
CZ
5537 if (grbm_soft_reset || srbm_soft_reset) {
5538 tmp = RREG32(mmGMCON_DEBUG);
5539 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5540 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5541 WREG32(mmGMCON_DEBUG, tmp);
5542 udelay(50);
5543 }
aaa36a97 5544
3d7c6384
CZ
5545 if (grbm_soft_reset) {
5546 tmp = RREG32(mmGRBM_SOFT_RESET);
5547 tmp |= grbm_soft_reset;
5548 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5549 WREG32(mmGRBM_SOFT_RESET, tmp);
5550 tmp = RREG32(mmGRBM_SOFT_RESET);
aaa36a97 5551
3d7c6384 5552 udelay(50);
aaa36a97 5553
3d7c6384
CZ
5554 tmp &= ~grbm_soft_reset;
5555 WREG32(mmGRBM_SOFT_RESET, tmp);
5556 tmp = RREG32(mmGRBM_SOFT_RESET);
5557 }
7776a693 5558
3d7c6384
CZ
5559 if (srbm_soft_reset) {
5560 tmp = RREG32(mmSRBM_SOFT_RESET);
5561 tmp |= srbm_soft_reset;
5562 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5563 WREG32(mmSRBM_SOFT_RESET, tmp);
5564 tmp = RREG32(mmSRBM_SOFT_RESET);
7776a693 5565
aaa36a97 5566 udelay(50);
7776a693 5567
3d7c6384
CZ
5568 tmp &= ~srbm_soft_reset;
5569 WREG32(mmSRBM_SOFT_RESET, tmp);
5570 tmp = RREG32(mmSRBM_SOFT_RESET);
aaa36a97 5571 }
7776a693 5572
3d7c6384
CZ
5573 if (grbm_soft_reset || srbm_soft_reset) {
5574 tmp = RREG32(mmGMCON_DEBUG);
5575 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5576 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5577 WREG32(mmGMCON_DEBUG, tmp);
aaa36a97 5578 }
3d7c6384
CZ
5579
5580 /* Wait a little for things to settle down */
5581 udelay(50);
5582
aaa36a97
AD
5583 return 0;
5584}
5585
e4ae0fc3
CZ
5586static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5587 struct amdgpu_ring *ring)
5588{
5589 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5590 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5591 WREG32(mmCP_HQD_PQ_RPTR, 0);
5592 WREG32(mmCP_HQD_PQ_WPTR, 0);
5593 vi_srbm_select(adev, 0, 0, 0, 0);
5594}
5595
5596static int gfx_v8_0_post_soft_reset(void *handle)
5597{
5598 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5599 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5600
da146d3b
AD
5601 if ((!adev->gfx.grbm_soft_reset) &&
5602 (!adev->gfx.srbm_soft_reset))
e4ae0fc3
CZ
5603 return 0;
5604
5605 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5606 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5607
5608 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5609 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5610 gfx_v8_0_cp_gfx_resume(adev);
5611
5612 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5613 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5614 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5615 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5616 int i;
5617
5618 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5619 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5620
5621 gfx_v8_0_init_hqd(adev, ring);
5622 }
5623 gfx_v8_0_cp_compute_resume(adev);
5624 }
5625 gfx_v8_0_rlc_start(adev);
5626
aaa36a97
AD
5627 return 0;
5628}
5629
5630/**
5631 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5632 *
5633 * @adev: amdgpu_device pointer
5634 *
5635 * Fetches a GPU clock counter snapshot.
5636 * Returns the 64 bit clock counter snapshot.
5637 */
b95e31fd 5638static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5639{
5640 uint64_t clock;
5641
5642 mutex_lock(&adev->gfx.gpu_clock_mutex);
5643 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5644 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5645 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5646 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5647 return clock;
5648}
5649
5650static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5651 uint32_t vmid,
5652 uint32_t gds_base, uint32_t gds_size,
5653 uint32_t gws_base, uint32_t gws_size,
5654 uint32_t oa_base, uint32_t oa_size)
5655{
5656 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5657 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5658
5659 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5660 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5661
5662 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5663 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5664
5665 /* GDS Base */
5666 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5667 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5668 WRITE_DATA_DST_SEL(0)));
5669 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5670 amdgpu_ring_write(ring, 0);
5671 amdgpu_ring_write(ring, gds_base);
5672
5673 /* GDS Size */
5674 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5675 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5676 WRITE_DATA_DST_SEL(0)));
5677 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5678 amdgpu_ring_write(ring, 0);
5679 amdgpu_ring_write(ring, gds_size);
5680
5681 /* GWS */
5682 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5684 WRITE_DATA_DST_SEL(0)));
5685 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5686 amdgpu_ring_write(ring, 0);
5687 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5688
5689 /* OA */
5690 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5691 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5692 WRITE_DATA_DST_SEL(0)));
5693 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5694 amdgpu_ring_write(ring, 0);
5695 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5696}
5697
472259f0
TSD
5698static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5699{
bc24fbe9
TSD
5700 WREG32(mmSQ_IND_INDEX,
5701 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5702 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5703 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5704 (SQ_IND_INDEX__FORCE_READ_MASK));
472259f0
TSD
5705 return RREG32(mmSQ_IND_DATA);
5706}
5707
c5a60ce8
TSD
5708static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5709 uint32_t wave, uint32_t thread,
5710 uint32_t regno, uint32_t num, uint32_t *out)
5711{
5712 WREG32(mmSQ_IND_INDEX,
5713 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5714 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5715 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5716 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5717 (SQ_IND_INDEX__FORCE_READ_MASK) |
5718 (SQ_IND_INDEX__AUTO_INCR_MASK));
5719 while (num--)
5720 *(out++) = RREG32(mmSQ_IND_DATA);
5721}
5722
472259f0
TSD
5723static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5724{
5725 /* type 0 wave data */
5726 dst[(*no_fields)++] = 0;
5727 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5728 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5729 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5730 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5731 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5732 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5733 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5734 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5735 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5736 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5737 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5738 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
74f3ce31
TSD
5739 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5740 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5741 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5742 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5743 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5744 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
472259f0
TSD
5745}
5746
c5a60ce8
TSD
5747static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5748 uint32_t wave, uint32_t start,
5749 uint32_t size, uint32_t *dst)
5750{
5751 wave_read_regs(
5752 adev, simd, wave, 0,
5753 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5754}
5755
472259f0 5756
b95e31fd
AD
5757static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5758 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5759 .select_se_sh = &gfx_v8_0_select_se_sh,
472259f0 5760 .read_wave_data = &gfx_v8_0_read_wave_data,
c5a60ce8 5761 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
b95e31fd
AD
5762};
5763
5fc3aeeb 5764static int gfx_v8_0_early_init(void *handle)
aaa36a97 5765{
5fc3aeeb 5766 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5767
5768 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5769 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
b95e31fd 5770 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5771 gfx_v8_0_set_ring_funcs(adev);
5772 gfx_v8_0_set_irq_funcs(adev);
5773 gfx_v8_0_set_gds_init(adev);
dbff57bc 5774 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5775
5776 return 0;
5777}
5778
ccba7691
AD
5779static int gfx_v8_0_late_init(void *handle)
5780{
5781 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5782 int r;
5783
1d22a454
AD
5784 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5785 if (r)
5786 return r;
5787
5788 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5789 if (r)
5790 return r;
5791
ccba7691
AD
5792 /* requires IBs so do in late init after IB pool is initialized */
5793 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5794 if (r)
5795 return r;
5796
62a86fc2
EH
5797 amdgpu_set_powergating_state(adev,
5798 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5799
ccba7691
AD
5800 return 0;
5801}
5802
c2546f55
AD
5803static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5804 bool enable)
62a86fc2 5805{
c4642a47
JZ
5806 if ((adev->asic_type == CHIP_POLARIS11) ||
5807 (adev->asic_type == CHIP_POLARIS12))
c2546f55
AD
5808 /* Send msg to SMU via Powerplay */
5809 amdgpu_set_powergating_state(adev,
5810 AMD_IP_BLOCK_TYPE_SMC,
5811 enable ?
5812 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
62a86fc2 5813
61cb8cef 5814 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5815}
5816
c2546f55
AD
5817static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5818 bool enable)
62a86fc2 5819{
61cb8cef 5820 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5821}
5822
2cc0c0b5 5823static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5824 bool enable)
5825{
61cb8cef 5826 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5827}
5828
2c547165
AD
5829static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5830 bool enable)
5831{
61cb8cef 5832 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
2c547165
AD
5833}
5834
5835static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5836 bool enable)
5837{
61cb8cef 5838 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
2c547165
AD
5839
5840 /* Read any GFX register to wake up GFX. */
5841 if (!enable)
61cb8cef 5842 RREG32(mmDB_RENDER_CONTROL);
2c547165
AD
5843}
5844
5845static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5846 bool enable)
5847{
5848 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5849 cz_enable_gfx_cg_power_gating(adev, true);
5850 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5851 cz_enable_gfx_pipeline_power_gating(adev, true);
5852 } else {
5853 cz_enable_gfx_cg_power_gating(adev, false);
5854 cz_enable_gfx_pipeline_power_gating(adev, false);
5855 }
5856}
5857
5fc3aeeb 5858static int gfx_v8_0_set_powergating_state(void *handle,
5859 enum amd_powergating_state state)
aaa36a97 5860{
62a86fc2 5861 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2c547165 5862 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
62a86fc2 5863
ce137c04
ML
5864 if (amdgpu_sriov_vf(adev))
5865 return 0;
5866
62a86fc2 5867 switch (adev->asic_type) {
2c547165
AD
5868 case CHIP_CARRIZO:
5869 case CHIP_STONEY:
ad1830d5 5870
5c964221
RZ
5871 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5872 cz_enable_sck_slow_down_on_power_up(adev, true);
5873 cz_enable_sck_slow_down_on_power_down(adev, true);
5874 } else {
5875 cz_enable_sck_slow_down_on_power_up(adev, false);
5876 cz_enable_sck_slow_down_on_power_down(adev, false);
5877 }
5878 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5879 cz_enable_cp_power_gating(adev, true);
5880 else
5881 cz_enable_cp_power_gating(adev, false);
5882
ad1830d5 5883 cz_update_gfx_cg_power_gating(adev, enable);
2c547165
AD
5884
5885 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5886 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5887 else
5888 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5889
5890 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5891 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5892 else
5893 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5894 break;
2cc0c0b5 5895 case CHIP_POLARIS11:
c4642a47 5896 case CHIP_POLARIS12:
7ba0eb6d
AD
5897 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5898 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5899 else
5900 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5901
5902 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5903 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5904 else
5905 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5906
5907 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5908 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5909 else
7ba0eb6d 5910 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5911 break;
5912 default:
5913 break;
5914 }
5915
aaa36a97
AD
5916 return 0;
5917}
5918
ebd843d6
HR
5919static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5920{
5921 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5922 int data;
5923
ce137c04
ML
5924 if (amdgpu_sriov_vf(adev))
5925 *flags = 0;
5926
ebd843d6
HR
5927 /* AMD_CG_SUPPORT_GFX_MGCG */
5928 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5929 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5930 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5931
5932 /* AMD_CG_SUPPORT_GFX_CGLG */
5933 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5934 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5935 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5936
5937 /* AMD_CG_SUPPORT_GFX_CGLS */
5938 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5939 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5940
5941 /* AMD_CG_SUPPORT_GFX_CGTS */
5942 data = RREG32(mmCGTS_SM_CTRL_REG);
5943 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5944 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5945
5946 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5947 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5948 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5949
5950 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5951 data = RREG32(mmRLC_MEM_SLP_CNTL);
5952 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5953 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5954
5955 /* AMD_CG_SUPPORT_GFX_CP_LS */
5956 data = RREG32(mmCP_MEM_SLP_CNTL);
5957 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5958 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5959}
5960
79deaaf4 5961static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5962 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5963{
5964 uint32_t data;
5965
9559ef5b 5966 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5967
5968 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5969 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5970
5971 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f 5972 if (adev->asic_type == CHIP_STONEY)
62d2ce4b
TSD
5973 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5974 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5975 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5976 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5977 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5978 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5979 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5980 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5981 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5982 else
5983 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5984 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5985 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5986 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5987 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5988 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5989 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5990 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5991 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5992 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5993 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5994 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5995 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5996 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5997 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5998
5999 WREG32(mmRLC_SERDES_WR_CTRL, data);
6000}
6001
dbff57bc
AD
6002#define MSG_ENTER_RLC_SAFE_MODE 1
6003#define MSG_EXIT_RLC_SAFE_MODE 0
61cb8cef
TSD
6004#define RLC_GPR_REG2__REQ_MASK 0x00000001
6005#define RLC_GPR_REG2__REQ__SHIFT 0
6006#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
6007#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
dbff57bc 6008
dbff57bc
AD
6009static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
6010{
6011 u32 data;
6012 unsigned i;
6013
6014 data = RREG32(mmRLC_CNTL);
6015 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6016 return;
6017
6018 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6019 data |= RLC_SAFE_MODE__CMD_MASK;
6020 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6021 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
6022 WREG32(mmRLC_SAFE_MODE, data);
6023
6024 for (i = 0; i < adev->usec_timeout; i++) {
6025 if ((RREG32(mmRLC_GPM_STAT) &
6026 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6027 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6028 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6029 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6030 break;
6031 udelay(1);
6032 }
6033
6034 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 6035 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
6036 break;
6037 udelay(1);
6038 }
6039 adev->gfx.rlc.in_safe_mode = true;
6040 }
6041}
6042
6043static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6044{
6045 u32 data = 0;
6046 unsigned i;
6047
6048 data = RREG32(mmRLC_CNTL);
6049 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6050 return;
6051
6052 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6053 if (adev->gfx.rlc.in_safe_mode) {
6054 data |= RLC_SAFE_MODE__CMD_MASK;
6055 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6056 WREG32(mmRLC_SAFE_MODE, data);
6057 adev->gfx.rlc.in_safe_mode = false;
6058 }
6059 }
6060
6061 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 6062 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
6063 break;
6064 udelay(1);
6065 }
6066}
6067
dbff57bc
AD
6068static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6069 .enter_safe_mode = iceland_enter_rlc_safe_mode,
6070 .exit_safe_mode = iceland_exit_rlc_safe_mode
6071};
6072
dbff57bc
AD
6073static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6074 bool enable)
6e378858
EH
6075{
6076 uint32_t temp, data;
6077
dbff57bc
AD
6078 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6079
6e378858 6080 /* It is disabled by HW by default */
14698b6c
AD
6081 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6082 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
61cb8cef 6083 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
14698b6c 6084 /* 1 - RLC memory Light sleep */
61cb8cef 6085 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6e378858 6086
61cb8cef
TSD
6087 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6088 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
14698b6c 6089 }
6e378858
EH
6090
6091 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6092 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
6093 if (adev->flags & AMD_IS_APU)
6094 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6095 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6096 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6097 else
6098 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6099 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6100 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6101 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
6102
6103 if (temp != data)
6104 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6105
6106 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6107 gfx_v8_0_wait_for_rlc_serdes(adev);
6108
6109 /* 5 - clear mgcg override */
79deaaf4 6110 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 6111
14698b6c
AD
6112 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6113 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6114 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6115 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6116 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6117 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6118 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6119 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6120 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6121 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6122 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6123 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6124 if (temp != data)
6125 WREG32(mmCGTS_SM_CTRL_REG, data);
6126 }
6e378858
EH
6127 udelay(50);
6128
6129 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6130 gfx_v8_0_wait_for_rlc_serdes(adev);
6131 } else {
6132 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6133 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6134 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6135 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6136 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6137 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6138 if (temp != data)
6139 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6140
6141 /* 2 - disable MGLS in RLC */
6142 data = RREG32(mmRLC_MEM_SLP_CNTL);
6143 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6144 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6145 WREG32(mmRLC_MEM_SLP_CNTL, data);
6146 }
6147
6148 /* 3 - disable MGLS in CP */
6149 data = RREG32(mmCP_MEM_SLP_CNTL);
6150 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6151 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6152 WREG32(mmCP_MEM_SLP_CNTL, data);
6153 }
6154
6155 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6156 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6157 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6158 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6159 if (temp != data)
6160 WREG32(mmCGTS_SM_CTRL_REG, data);
6161
6162 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6163 gfx_v8_0_wait_for_rlc_serdes(adev);
6164
6165 /* 6 - set mgcg override */
79deaaf4 6166 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6167
6168 udelay(50);
6169
6170 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6171 gfx_v8_0_wait_for_rlc_serdes(adev);
6172 }
dbff57bc
AD
6173
6174 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
6175}
6176
dbff57bc
AD
6177static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6178 bool enable)
6e378858
EH
6179{
6180 uint32_t temp, temp1, data, data1;
6181
6182 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6183
dbff57bc
AD
6184 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6185
14698b6c 6186 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
6187 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6188 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6189 if (temp1 != data1)
6190 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6191
dd31ae9a 6192 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6e378858
EH
6193 gfx_v8_0_wait_for_rlc_serdes(adev);
6194
dd31ae9a 6195 /* 2 - clear cgcg override */
79deaaf4 6196 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
6197
6198 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6199 gfx_v8_0_wait_for_rlc_serdes(adev);
6200
dd31ae9a 6201 /* 3 - write cmd to set CGLS */
79deaaf4 6202 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858 6203
dd31ae9a 6204 /* 4 - enable cgcg */
6e378858
EH
6205 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6206
14698b6c
AD
6207 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6208 /* enable cgls*/
6209 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 6210
14698b6c
AD
6211 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6212 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 6213
14698b6c
AD
6214 if (temp1 != data1)
6215 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6216 } else {
6217 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6218 }
6e378858
EH
6219
6220 if (temp != data)
6221 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
dd31ae9a
AN
6222
6223 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6224 * Cmp_busy/GFX_Idle interrupts
6225 */
6226 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858
EH
6227 } else {
6228 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6229 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6230
6231 /* TEST CGCG */
6232 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6233 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6234 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6235 if (temp1 != data1)
6236 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6237
6238 /* read gfx register to wake up cgcg */
6239 RREG32(mmCB_CGTT_SCLK_CTRL);
6240 RREG32(mmCB_CGTT_SCLK_CTRL);
6241 RREG32(mmCB_CGTT_SCLK_CTRL);
6242 RREG32(mmCB_CGTT_SCLK_CTRL);
6243
6244 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6245 gfx_v8_0_wait_for_rlc_serdes(adev);
6246
6247 /* write cmd to Set CGCG Overrride */
79deaaf4 6248 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6249
6250 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6251 gfx_v8_0_wait_for_rlc_serdes(adev);
6252
6253 /* write cmd to Clear CGLS */
79deaaf4 6254 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
6255
6256 /* disable cgcg, cgls should be disabled too. */
6257 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 6258 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
6259 if (temp != data)
6260 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6261 }
dbff57bc 6262
7894745a
TSD
6263 gfx_v8_0_wait_for_rlc_serdes(adev);
6264
dbff57bc 6265 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 6266}
dbff57bc
AD
6267static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6268 bool enable)
6e378858
EH
6269{
6270 if (enable) {
6271 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6272 * === MGCG + MGLS + TS(CG/LS) ===
6273 */
dbff57bc
AD
6274 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6275 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
6276 } else {
6277 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6278 * === CGCG + CGLS ===
6279 */
dbff57bc
AD
6280 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6281 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
6282 }
6283 return 0;
6284}
6285
a8ca3413
RZ
6286static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6287 enum amd_clockgating_state state)
6288{
8a19e7fa
RZ
6289 uint32_t msg_id, pp_state = 0;
6290 uint32_t pp_support_state = 0;
a8ca3413
RZ
6291 void *pp_handle = adev->powerplay.pp_handle;
6292
8a19e7fa
RZ
6293 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6294 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6295 pp_support_state = PP_STATE_SUPPORT_LS;
6296 pp_state = PP_STATE_LS;
6297 }
6298 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6299 pp_support_state |= PP_STATE_SUPPORT_CG;
6300 pp_state |= PP_STATE_CG;
6301 }
6302 if (state == AMD_CG_STATE_UNGATE)
6303 pp_state = 0;
6304
6305 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6306 PP_BLOCK_GFX_CG,
6307 pp_support_state,
6308 pp_state);
6309 amd_set_clockgating_by_smu(pp_handle, msg_id);
6310 }
a8ca3413 6311
8a19e7fa
RZ
6312 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6313 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6314 pp_support_state = PP_STATE_SUPPORT_LS;
6315 pp_state = PP_STATE_LS;
6316 }
a8ca3413 6317
8a19e7fa
RZ
6318 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6319 pp_support_state |= PP_STATE_SUPPORT_CG;
6320 pp_state |= PP_STATE_CG;
6321 }
6322
6323 if (state == AMD_CG_STATE_UNGATE)
6324 pp_state = 0;
6325
6326 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6327 PP_BLOCK_GFX_MG,
6328 pp_support_state,
6329 pp_state);
6330 amd_set_clockgating_by_smu(pp_handle, msg_id);
6331 }
a8ca3413
RZ
6332
6333 return 0;
6334}
6335
6336static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6337 enum amd_clockgating_state state)
6338{
8a19e7fa
RZ
6339
6340 uint32_t msg_id, pp_state = 0;
6341 uint32_t pp_support_state = 0;
a8ca3413
RZ
6342 void *pp_handle = adev->powerplay.pp_handle;
6343
8a19e7fa
RZ
6344 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6345 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6346 pp_support_state = PP_STATE_SUPPORT_LS;
6347 pp_state = PP_STATE_LS;
6348 }
6349 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6350 pp_support_state |= PP_STATE_SUPPORT_CG;
6351 pp_state |= PP_STATE_CG;
6352 }
6353 if (state == AMD_CG_STATE_UNGATE)
6354 pp_state = 0;
6355
6356 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6357 PP_BLOCK_GFX_CG,
6358 pp_support_state,
6359 pp_state);
6360 amd_set_clockgating_by_smu(pp_handle, msg_id);
6361 }
a8ca3413 6362
8a19e7fa
RZ
6363 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6364 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6365 pp_support_state = PP_STATE_SUPPORT_LS;
6366 pp_state = PP_STATE_LS;
6367 }
6368 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6369 pp_support_state |= PP_STATE_SUPPORT_CG;
6370 pp_state |= PP_STATE_CG;
6371 }
6372 if (state == AMD_CG_STATE_UNGATE)
6373 pp_state = 0;
6374
6375 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6376 PP_BLOCK_GFX_3D,
6377 pp_support_state,
6378 pp_state);
6379 amd_set_clockgating_by_smu(pp_handle, msg_id);
6380 }
a8ca3413 6381
8a19e7fa
RZ
6382 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6383 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6384 pp_support_state = PP_STATE_SUPPORT_LS;
6385 pp_state = PP_STATE_LS;
6386 }
a8ca3413 6387
8a19e7fa
RZ
6388 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6389 pp_support_state |= PP_STATE_SUPPORT_CG;
6390 pp_state |= PP_STATE_CG;
6391 }
a8ca3413 6392
8a19e7fa
RZ
6393 if (state == AMD_CG_STATE_UNGATE)
6394 pp_state = 0;
a8ca3413 6395
8a19e7fa
RZ
6396 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6397 PP_BLOCK_GFX_MG,
6398 pp_support_state,
6399 pp_state);
6400 amd_set_clockgating_by_smu(pp_handle, msg_id);
6401 }
6402
6403 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6404 pp_support_state = PP_STATE_SUPPORT_LS;
6405
6406 if (state == AMD_CG_STATE_UNGATE)
6407 pp_state = 0;
6408 else
6409 pp_state = PP_STATE_LS;
6410
6411 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6412 PP_BLOCK_GFX_RLC,
6413 pp_support_state,
6414 pp_state);
6415 amd_set_clockgating_by_smu(pp_handle, msg_id);
6416 }
6417
6418 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6419 pp_support_state = PP_STATE_SUPPORT_LS;
6420
6421 if (state == AMD_CG_STATE_UNGATE)
6422 pp_state = 0;
6423 else
6424 pp_state = PP_STATE_LS;
6425 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
a8ca3413 6426 PP_BLOCK_GFX_CP,
8a19e7fa 6427 pp_support_state,
a8ca3413 6428 pp_state);
8a19e7fa
RZ
6429 amd_set_clockgating_by_smu(pp_handle, msg_id);
6430 }
a8ca3413
RZ
6431
6432 return 0;
6433}
6434
5fc3aeeb 6435static int gfx_v8_0_set_clockgating_state(void *handle,
6436 enum amd_clockgating_state state)
aaa36a97 6437{
6e378858
EH
6438 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6439
ce137c04
ML
6440 if (amdgpu_sriov_vf(adev))
6441 return 0;
6442
6e378858
EH
6443 switch (adev->asic_type) {
6444 case CHIP_FIJI:
dbff57bc
AD
6445 case CHIP_CARRIZO:
6446 case CHIP_STONEY:
6447 gfx_v8_0_update_gfx_clock_gating(adev,
6448 state == AMD_CG_STATE_GATE ? true : false);
6e378858 6449 break;
a8ca3413
RZ
6450 case CHIP_TONGA:
6451 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6452 break;
6453 case CHIP_POLARIS10:
6454 case CHIP_POLARIS11:
6455 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6456 break;
6e378858
EH
6457 default:
6458 break;
6459 }
aaa36a97
AD
6460 return 0;
6461}
6462
536fbf94 6463static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
aaa36a97 6464{
5003f278 6465 return ring->adev->wb.wb[ring->rptr_offs];
aaa36a97
AD
6466}
6467
536fbf94 6468static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
aaa36a97
AD
6469{
6470 struct amdgpu_device *adev = ring->adev;
aaa36a97
AD
6471
6472 if (ring->use_doorbell)
6473 /* XXX check if swapping is necessary on BE */
5003f278 6474 return ring->adev->wb.wb[ring->wptr_offs];
aaa36a97 6475 else
5003f278 6476 return RREG32(mmCP_RB0_WPTR);
aaa36a97
AD
6477}
6478
6479static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6480{
6481 struct amdgpu_device *adev = ring->adev;
6482
6483 if (ring->use_doorbell) {
6484 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6485 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6486 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97 6487 } else {
536fbf94 6488 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
6489 (void)RREG32(mmCP_RB0_WPTR);
6490 }
6491}
6492
d2edb07b 6493static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
6494{
6495 u32 ref_and_mask, reg_mem_engine;
6496
4e638ae9
XY
6497 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6498 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
aaa36a97
AD
6499 switch (ring->me) {
6500 case 1:
6501 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6502 break;
6503 case 2:
6504 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6505 break;
6506 default:
6507 return;
6508 }
6509 reg_mem_engine = 0;
6510 } else {
6511 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6512 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6513 }
6514
6515 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6516 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6517 WAIT_REG_MEM_FUNCTION(3) | /* == */
6518 reg_mem_engine));
6519 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6520 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6521 amdgpu_ring_write(ring, ref_and_mask);
6522 amdgpu_ring_write(ring, ref_and_mask);
6523 amdgpu_ring_write(ring, 0x20); /* poll interval */
6524}
6525
45682886
ML
6526static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6527{
6528 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6529 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6530 EVENT_INDEX(4));
6531
6532 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6533 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6534 EVENT_INDEX(0));
6535}
6536
6537
d35db561
CZ
6538static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6539{
6540 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6541 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6542 WRITE_DATA_DST_SEL(0) |
6543 WR_CONFIRM));
6544 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6545 amdgpu_ring_write(ring, 0);
6546 amdgpu_ring_write(ring, 1);
6547
6548}
6549
93323131 6550static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583
CK
6551 struct amdgpu_ib *ib,
6552 unsigned vm_id, bool ctx_switch)
aaa36a97
AD
6553{
6554 u32 header, control = 0;
aaa36a97 6555
de807f81 6556 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
6557 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6558 else
6559 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6560
d88bf583 6561 control |= ib->length_dw | (vm_id << 24);
aaa36a97
AD
6562
6563 amdgpu_ring_write(ring, header);
6564 amdgpu_ring_write(ring,
6565#ifdef __BIG_ENDIAN
6566 (2 << 0) |
6567#endif
6568 (ib->gpu_addr & 0xFFFFFFFC));
6569 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6570 amdgpu_ring_write(ring, control);
6571}
6572
93323131 6573static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583
CK
6574 struct amdgpu_ib *ib,
6575 unsigned vm_id, bool ctx_switch)
93323131 6576{
33b7ed01 6577 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
93323131 6578
33b7ed01 6579 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 6580 amdgpu_ring_write(ring,
6581#ifdef __BIG_ENDIAN
62d2ce4b 6582 (2 << 0) |
93323131 6583#endif
62d2ce4b 6584 (ib->gpu_addr & 0xFFFFFFFC));
93323131 6585 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6586 amdgpu_ring_write(ring, control);
6587}
6588
aaa36a97 6589static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 6590 u64 seq, unsigned flags)
aaa36a97 6591{
890ee23f
CZ
6592 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6593 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6594
aaa36a97
AD
6595 /* EVENT_WRITE_EOP - flush caches, send int */
6596 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6597 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6598 EOP_TC_ACTION_EN |
f84e63f2 6599 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6600 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6601 EVENT_INDEX(5)));
6602 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 6603 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 6604 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6605 amdgpu_ring_write(ring, lower_32_bits(seq));
6606 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 6607
aaa36a97
AD
6608}
6609
b8c7b39e 6610static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97 6611{
21cd942e 6612 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 6613 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
6614 uint64_t addr = ring->fence_drv.gpu_addr;
6615
6616 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6617 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6618 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6619 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6620 amdgpu_ring_write(ring, addr & 0xfffffffc);
6621 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6622 amdgpu_ring_write(ring, seq);
6623 amdgpu_ring_write(ring, 0xffffffff);
6624 amdgpu_ring_write(ring, 4); /* poll interval */
b8c7b39e
CK
6625}
6626
6627static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6628 unsigned vm_id, uint64_t pd_addr)
6629{
21cd942e 6630 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6631
aaa36a97
AD
6632 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6633 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
6634 WRITE_DATA_DST_SEL(0)) |
6635 WR_CONFIRM);
aaa36a97
AD
6636 if (vm_id < 8) {
6637 amdgpu_ring_write(ring,
6638 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6639 } else {
6640 amdgpu_ring_write(ring,
6641 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6642 }
6643 amdgpu_ring_write(ring, 0);
6644 amdgpu_ring_write(ring, pd_addr >> 12);
6645
aaa36a97
AD
6646 /* bits 0-15 are the VM contexts0-15 */
6647 /* invalidate the cache */
6648 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6649 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6650 WRITE_DATA_DST_SEL(0)));
6651 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6652 amdgpu_ring_write(ring, 0);
6653 amdgpu_ring_write(ring, 1 << vm_id);
6654
6655 /* wait for the invalidate to complete */
6656 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6657 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6658 WAIT_REG_MEM_FUNCTION(0) | /* always */
6659 WAIT_REG_MEM_ENGINE(0))); /* me */
6660 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6661 amdgpu_ring_write(ring, 0);
6662 amdgpu_ring_write(ring, 0); /* ref */
6663 amdgpu_ring_write(ring, 0); /* mask */
6664 amdgpu_ring_write(ring, 0x20); /* poll interval */
6665
6666 /* compute doesn't have PFP */
6667 if (usepfp) {
6668 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6669 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6670 amdgpu_ring_write(ring, 0x0);
d4946ccf
ML
6671 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6672 amdgpu_ring_insert_nop(ring, 128);
aaa36a97
AD
6673 }
6674}
6675
536fbf94 6676static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
aaa36a97
AD
6677{
6678 return ring->adev->wb.wb[ring->wptr_offs];
6679}
6680
6681static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6682{
6683 struct amdgpu_device *adev = ring->adev;
6684
6685 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6686 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6687 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97
AD
6688}
6689
6690static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6691 u64 addr, u64 seq,
890ee23f 6692 unsigned flags)
aaa36a97 6693{
890ee23f
CZ
6694 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6695 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6696
aaa36a97
AD
6697 /* RELEASE_MEM - flush caches, send int */
6698 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6699 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6700 EOP_TC_ACTION_EN |
a3d5aaa8 6701 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6702 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6703 EVENT_INDEX(5)));
890ee23f 6704 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6705 amdgpu_ring_write(ring, addr & 0xfffffffc);
6706 amdgpu_ring_write(ring, upper_32_bits(addr));
6707 amdgpu_ring_write(ring, lower_32_bits(seq));
6708 amdgpu_ring_write(ring, upper_32_bits(seq));
6709}
6710
4e638ae9
XY
6711static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6712 u64 seq, unsigned int flags)
6713{
6714 /* we only allocate 32bit for each seq wb address */
f10b478d 6715 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4e638ae9
XY
6716
6717 /* write fence seq to the "addr" */
6718 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6719 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6720 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6721 amdgpu_ring_write(ring, lower_32_bits(addr));
6722 amdgpu_ring_write(ring, upper_32_bits(addr));
6723 amdgpu_ring_write(ring, lower_32_bits(seq));
6724
6725 if (flags & AMDGPU_FENCE_FLAG_INT) {
6726 /* set register to trigger INT */
6727 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6728 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6729 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6730 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6731 amdgpu_ring_write(ring, 0);
6732 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6733 }
6734}
6735
c2167a65
ML
6736static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6737{
6738 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6739 amdgpu_ring_write(ring, 0);
6740}
6741
753ad49c
ML
6742static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6743{
6744 uint32_t dw2 = 0;
6745
c2ce92fc
ML
6746 if (amdgpu_sriov_vf(ring->adev))
6747 gfx_v8_0_ring_emit_ce_meta_init(ring,
6748 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6749
753ad49c
ML
6750 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6751 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
45682886 6752 gfx_v8_0_ring_emit_vgt_flush(ring);
753ad49c
ML
6753 /* set load_global_config & load_global_uconfig */
6754 dw2 |= 0x8001;
6755 /* set load_cs_sh_regs */
6756 dw2 |= 0x01000000;
6757 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6758 dw2 |= 0x10002;
6759
6760 /* set load_ce_ram if preamble presented */
6761 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6762 dw2 |= 0x10000000;
6763 } else {
6764 /* still load_ce_ram if this is the first time preamble presented
6765 * although there is no context switch happens.
6766 */
6767 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6768 dw2 |= 0x10000000;
6769 }
6770
6771 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6772 amdgpu_ring_write(ring, dw2);
6773 amdgpu_ring_write(ring, 0);
c2ce92fc
ML
6774
6775 if (amdgpu_sriov_vf(ring->adev))
6776 gfx_v8_0_ring_emit_de_meta_init(ring,
6777 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
753ad49c
ML
6778}
6779
880e87e3
XY
6780static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6781{
6782 struct amdgpu_device *adev = ring->adev;
6783
6784 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6785 amdgpu_ring_write(ring, 0 | /* src: register*/
6786 (5 << 8) | /* dst: memory */
6787 (1 << 20)); /* write confirm */
6788 amdgpu_ring_write(ring, reg);
6789 amdgpu_ring_write(ring, 0);
6790 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6791 adev->virt.reg_val_offs * 4));
6792 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6793 adev->virt.reg_val_offs * 4));
6794}
6795
6796static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6797 uint32_t val)
6798{
6799 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6800 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6801 amdgpu_ring_write(ring, reg);
6802 amdgpu_ring_write(ring, 0);
6803 amdgpu_ring_write(ring, val);
6804}
6805
aaa36a97
AD
6806static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6807 enum amdgpu_interrupt_state state)
6808{
61cb8cef
TSD
6809 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6810 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6811}
6812
6813static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6814 int me, int pipe,
6815 enum amdgpu_interrupt_state state)
6816{
aaa36a97
AD
6817 /*
6818 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6819 * handles the setting of interrupts for this specific pipe. All other
6820 * pipes' interrupts are set by amdkfd.
6821 */
6822
6823 if (me == 1) {
6824 switch (pipe) {
6825 case 0:
aaa36a97
AD
6826 break;
6827 default:
6828 DRM_DEBUG("invalid pipe %d\n", pipe);
6829 return;
6830 }
6831 } else {
6832 DRM_DEBUG("invalid me %d\n", me);
6833 return;
6834 }
6835
61cb8cef
TSD
6836 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6837 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6838}
6839
6840static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6841 struct amdgpu_irq_src *source,
6842 unsigned type,
6843 enum amdgpu_interrupt_state state)
6844{
61cb8cef
TSD
6845 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6846 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6847
6848 return 0;
6849}
6850
6851static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6852 struct amdgpu_irq_src *source,
6853 unsigned type,
6854 enum amdgpu_interrupt_state state)
6855{
61cb8cef
TSD
6856 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6857 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6858
6859 return 0;
6860}
6861
6862static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6863 struct amdgpu_irq_src *src,
6864 unsigned type,
6865 enum amdgpu_interrupt_state state)
6866{
6867 switch (type) {
6868 case AMDGPU_CP_IRQ_GFX_EOP:
6869 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6870 break;
6871 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6872 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6873 break;
6874 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6875 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6876 break;
6877 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6878 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6879 break;
6880 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6881 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6882 break;
6883 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6884 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6885 break;
6886 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6887 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6888 break;
6889 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6890 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6891 break;
6892 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6893 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6894 break;
6895 default:
6896 break;
6897 }
6898 return 0;
6899}
6900
6901static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6902 struct amdgpu_irq_src *source,
6903 struct amdgpu_iv_entry *entry)
6904{
6905 int i;
6906 u8 me_id, pipe_id, queue_id;
6907 struct amdgpu_ring *ring;
6908
6909 DRM_DEBUG("IH: CP EOP\n");
6910 me_id = (entry->ring_id & 0x0c) >> 2;
6911 pipe_id = (entry->ring_id & 0x03) >> 0;
6912 queue_id = (entry->ring_id & 0x70) >> 4;
6913
6914 switch (me_id) {
6915 case 0:
6916 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6917 break;
6918 case 1:
6919 case 2:
6920 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6921 ring = &adev->gfx.compute_ring[i];
6922 /* Per-queue interrupt is supported for MEC starting from VI.
6923 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6924 */
6925 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6926 amdgpu_fence_process(ring);
6927 }
6928 break;
6929 }
6930 return 0;
6931}
6932
6933static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6934 struct amdgpu_irq_src *source,
6935 struct amdgpu_iv_entry *entry)
6936{
6937 DRM_ERROR("Illegal register access in command stream\n");
6938 schedule_work(&adev->reset_work);
6939 return 0;
6940}
6941
6942static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6943 struct amdgpu_irq_src *source,
6944 struct amdgpu_iv_entry *entry)
6945{
6946 DRM_ERROR("Illegal instruction in command stream\n");
6947 schedule_work(&adev->reset_work);
6948 return 0;
6949}
6950
4e638ae9
XY
6951static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6952 struct amdgpu_irq_src *src,
6953 unsigned int type,
6954 enum amdgpu_interrupt_state state)
6955{
6956 uint32_t tmp, target;
07c397f9 6957 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6958
07c397f9 6959 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
4e638ae9
XY
6960
6961 if (ring->me == 1)
6962 target = mmCP_ME1_PIPE0_INT_CNTL;
6963 else
6964 target = mmCP_ME2_PIPE0_INT_CNTL;
6965 target += ring->pipe;
6966
6967 switch (type) {
6968 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6969 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6970 tmp = RREG32(mmCPC_INT_CNTL);
6971 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6972 GENERIC2_INT_ENABLE, 0);
6973 WREG32(mmCPC_INT_CNTL, tmp);
6974
6975 tmp = RREG32(target);
6976 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6977 GENERIC2_INT_ENABLE, 0);
6978 WREG32(target, tmp);
6979 } else {
6980 tmp = RREG32(mmCPC_INT_CNTL);
6981 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6982 GENERIC2_INT_ENABLE, 1);
6983 WREG32(mmCPC_INT_CNTL, tmp);
6984
6985 tmp = RREG32(target);
6986 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6987 GENERIC2_INT_ENABLE, 1);
6988 WREG32(target, tmp);
6989 }
6990 break;
6991 default:
6992 BUG(); /* kiq only support GENERIC2_INT now */
6993 break;
6994 }
6995 return 0;
6996}
6997
6998static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6999 struct amdgpu_irq_src *source,
7000 struct amdgpu_iv_entry *entry)
7001{
7002 u8 me_id, pipe_id, queue_id;
07c397f9 7003 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 7004
07c397f9 7005 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
4e638ae9
XY
7006
7007 me_id = (entry->ring_id & 0x0c) >> 2;
7008 pipe_id = (entry->ring_id & 0x03) >> 0;
7009 queue_id = (entry->ring_id & 0x70) >> 4;
7010 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7011 me_id, pipe_id, queue_id);
7012
7013 amdgpu_fence_process(ring);
7014 return 0;
7015}
7016
a1255107 7017static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 7018 .name = "gfx_v8_0",
aaa36a97 7019 .early_init = gfx_v8_0_early_init,
ccba7691 7020 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
7021 .sw_init = gfx_v8_0_sw_init,
7022 .sw_fini = gfx_v8_0_sw_fini,
7023 .hw_init = gfx_v8_0_hw_init,
7024 .hw_fini = gfx_v8_0_hw_fini,
7025 .suspend = gfx_v8_0_suspend,
7026 .resume = gfx_v8_0_resume,
7027 .is_idle = gfx_v8_0_is_idle,
7028 .wait_for_idle = gfx_v8_0_wait_for_idle,
3d7c6384 7029 .check_soft_reset = gfx_v8_0_check_soft_reset,
1057f20c 7030 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
aaa36a97 7031 .soft_reset = gfx_v8_0_soft_reset,
e4ae0fc3 7032 .post_soft_reset = gfx_v8_0_post_soft_reset,
aaa36a97
AD
7033 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7034 .set_powergating_state = gfx_v8_0_set_powergating_state,
ebd843d6 7035 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
aaa36a97
AD
7036};
7037
7038static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
21cd942e 7039 .type = AMDGPU_RING_TYPE_GFX,
79887142
CK
7040 .align_mask = 0xff,
7041 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7042 .support_64bit_ptrs = false,
e7706b42 7043 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7044 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7045 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
e12f3d7a
CK
7046 .emit_frame_size =
7047 20 + /* gfx_v8_0_ring_emit_gds_switch */
7048 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7049 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7050 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7051 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7052 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
7053 2 + /* gfx_v8_ring_emit_sb */
c2ce92fc 7054 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
e12f3d7a 7055 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
93323131 7056 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 7057 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 7058 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7059 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7060 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 7061 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 7062 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
7063 .test_ring = gfx_v8_0_ring_test_ring,
7064 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7065 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7066 .pad_ib = amdgpu_ring_generic_pad_ib,
c2167a65 7067 .emit_switch_buffer = gfx_v8_ring_emit_sb,
753ad49c 7068 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
aaa36a97
AD
7069};
7070
7071static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
21cd942e 7072 .type = AMDGPU_RING_TYPE_COMPUTE,
79887142
CK
7073 .align_mask = 0xff,
7074 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7075 .support_64bit_ptrs = false,
e7706b42 7076 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7077 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7078 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
e12f3d7a
CK
7079 .emit_frame_size =
7080 20 + /* gfx_v8_0_ring_emit_gds_switch */
7081 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7082 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7083 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7084 17 + /* gfx_v8_0_ring_emit_vm_flush */
7085 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7086 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
93323131 7087 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 7088 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 7089 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7090 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7091 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 7092 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 7093 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
7094 .test_ring = gfx_v8_0_ring_test_ring,
7095 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7096 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7097 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
7098};
7099
4e638ae9
XY
7100static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7101 .type = AMDGPU_RING_TYPE_KIQ,
7102 .align_mask = 0xff,
7103 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7104 .support_64bit_ptrs = false,
4e638ae9
XY
7105 .get_rptr = gfx_v8_0_ring_get_rptr,
7106 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7107 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7108 .emit_frame_size =
7109 20 + /* gfx_v8_0_ring_emit_gds_switch */
7110 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7111 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7112 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7113 17 + /* gfx_v8_0_ring_emit_vm_flush */
7114 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7115 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7116 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7117 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7118 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7119 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7120 .test_ring = gfx_v8_0_ring_test_ring,
7121 .test_ib = gfx_v8_0_ring_test_ib,
7122 .insert_nop = amdgpu_ring_insert_nop,
7123 .pad_ib = amdgpu_ring_generic_pad_ib,
880e87e3
XY
7124 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7125 .emit_wreg = gfx_v8_0_ring_emit_wreg,
4e638ae9
XY
7126};
7127
aaa36a97
AD
7128static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7129{
7130 int i;
7131
4e638ae9
XY
7132 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7133
aaa36a97
AD
7134 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7135 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7136
7137 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7138 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7139}
7140
7141static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7142 .set = gfx_v8_0_set_eop_interrupt_state,
7143 .process = gfx_v8_0_eop_irq,
7144};
7145
7146static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7147 .set = gfx_v8_0_set_priv_reg_fault_state,
7148 .process = gfx_v8_0_priv_reg_irq,
7149};
7150
7151static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7152 .set = gfx_v8_0_set_priv_inst_fault_state,
7153 .process = gfx_v8_0_priv_inst_irq,
7154};
7155
4e638ae9
XY
7156static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7157 .set = gfx_v8_0_kiq_set_interrupt_state,
7158 .process = gfx_v8_0_kiq_irq,
7159};
7160
aaa36a97
AD
7161static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7162{
7163 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7164 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7165
7166 adev->gfx.priv_reg_irq.num_types = 1;
7167 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7168
7169 adev->gfx.priv_inst_irq.num_types = 1;
7170 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4e638ae9
XY
7171
7172 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7173 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
aaa36a97
AD
7174}
7175
dbff57bc
AD
7176static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7177{
ae6a58e4 7178 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
dbff57bc
AD
7179}
7180
aaa36a97
AD
7181static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7182{
7183 /* init asci gds info */
7184 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7185 adev->gds.gws.total_size = 64;
7186 adev->gds.oa.total_size = 16;
7187
7188 if (adev->gds.mem.total_size == 64 * 1024) {
7189 adev->gds.mem.gfx_partition_size = 4096;
7190 adev->gds.mem.cs_partition_size = 4096;
7191
7192 adev->gds.gws.gfx_partition_size = 4;
7193 adev->gds.gws.cs_partition_size = 4;
7194
7195 adev->gds.oa.gfx_partition_size = 4;
7196 adev->gds.oa.cs_partition_size = 1;
7197 } else {
7198 adev->gds.mem.gfx_partition_size = 1024;
7199 adev->gds.mem.cs_partition_size = 1024;
7200
7201 adev->gds.gws.gfx_partition_size = 16;
7202 adev->gds.gws.cs_partition_size = 16;
7203
7204 adev->gds.oa.gfx_partition_size = 4;
7205 adev->gds.oa.cs_partition_size = 4;
7206 }
7207}
7208
9de06de8
NH
7209static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7210 u32 bitmap)
7211{
7212 u32 data;
7213
7214 if (!bitmap)
7215 return;
7216
7217 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7218 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7219
7220 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7221}
7222
8f8e00c1 7223static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 7224{
8f8e00c1 7225 u32 data, mask;
aaa36a97 7226
5003f278
TSD
7227 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7228 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 7229
6157bd7a 7230 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 7231
5003f278 7232 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
aaa36a97
AD
7233}
7234
7dae69a2 7235static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
7236{
7237 int i, j, k, counter, active_cu_number = 0;
7238 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 7239 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 7240 unsigned disable_masks[4 * 2];
aaa36a97 7241
6157bd7a
FC
7242 memset(cu_info, 0, sizeof(*cu_info));
7243
9de06de8
NH
7244 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7245
aaa36a97
AD
7246 mutex_lock(&adev->grbm_idx_mutex);
7247 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7248 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7249 mask = 1;
7250 ao_bitmap = 0;
7251 counter = 0;
9559ef5b 7252 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
7253 if (i < 4 && j < 2)
7254 gfx_v8_0_set_user_cu_inactive_bitmap(
7255 adev, disable_masks[i * 2 + j]);
8f8e00c1 7256 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
7257 cu_info->bitmap[i][j] = bitmap;
7258
8f8e00c1 7259 for (k = 0; k < 16; k ++) {
aaa36a97
AD
7260 if (bitmap & mask) {
7261 if (counter < 2)
7262 ao_bitmap |= mask;
7263 counter ++;
7264 }
7265 mask <<= 1;
7266 }
7267 active_cu_number += counter;
7268 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7269 }
7270 }
9559ef5b 7271 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 7272 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
7273
7274 cu_info->number = active_cu_number;
7275 cu_info->ao_cu_mask = ao_cu_mask;
aaa36a97 7276}
a1255107
AD
7277
7278const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7279{
7280 .type = AMD_IP_BLOCK_TYPE_GFX,
7281 .major = 8,
7282 .minor = 0,
7283 .rev = 0,
7284 .funcs = &gfx_v8_0_ip_funcs,
7285};
7286
7287const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7288{
7289 .type = AMD_IP_BLOCK_TYPE_GFX,
7290 .major = 8,
7291 .minor = 1,
7292 .rev = 0,
7293 .funcs = &gfx_v8_0_ip_funcs,
7294};
acad2b2a
ML
7295
7296static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7297{
7298 uint64_t ce_payload_addr;
7299 int cnt_ce;
7300 static union {
49abb980
XY
7301 struct vi_ce_ib_state regular;
7302 struct vi_ce_ib_state_chained_ib chained;
e8411302 7303 } ce_payload = {};
acad2b2a
ML
7304
7305 if (ring->adev->virt.chained_ib_support) {
49abb980 7306 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
acad2b2a
ML
7307 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7308 } else {
49abb980 7309 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
acad2b2a
ML
7310 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7311 }
7312
7313 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7314 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7315 WRITE_DATA_DST_SEL(8) |
7316 WR_CONFIRM) |
7317 WRITE_DATA_CACHE_POLICY(0));
7318 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7319 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7320 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7321}
7322
7323static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7324{
7325 uint64_t de_payload_addr, gds_addr;
7326 int cnt_de;
7327 static union {
49abb980
XY
7328 struct vi_de_ib_state regular;
7329 struct vi_de_ib_state_chained_ib chained;
e8411302 7330 } de_payload = {};
acad2b2a
ML
7331
7332 gds_addr = csa_addr + 4096;
7333 if (ring->adev->virt.chained_ib_support) {
7334 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7335 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7336 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
acad2b2a
ML
7337 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7338 } else {
7339 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7340 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7341 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
acad2b2a
ML
7342 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7343 }
7344
7345 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7346 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7347 WRITE_DATA_DST_SEL(8) |
7348 WR_CONFIRM) |
7349 WRITE_DATA_CACHE_POLICY(0));
7350 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7351 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7352 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7353}
5ff98043
ML
7354
7355/* create MQD for each compute queue */
7356static int gfx_v8_0_compute_mqd_soft_init(struct amdgpu_device *adev)
7357{
7358 struct amdgpu_ring *ring = NULL;
7359 int r, i;
7360
7361 /* create MQD for KIQ */
7362 ring = &adev->gfx.kiq.ring;
7363 if (!ring->mqd_obj) {
7364 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7365 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
59a82d7d 7366 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7367 if (r) {
7368 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7369 return r;
7370 }
9b49c3ab
ML
7371
7372 /* prepare MQD backup */
7373 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7374 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7375 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7376 }
7377
7378 /* create MQD for each KCQ */
7379 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7380 {
7381 ring = &adev->gfx.compute_ring[i];
7382 if (!ring->mqd_obj) {
7383 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7384 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
59a82d7d 7385 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7386 if (r) {
7387 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7388 return r;
7389 }
9b49c3ab
ML
7390
7391 /* prepare MQD backup */
7392 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7393 if (!adev->gfx.mec.mqd_backup[i])
7394 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7395 }
7396 }
7397
7398 return 0;
7399}
7400
7401static void gfx_v8_0_compute_mqd_soft_fini(struct amdgpu_device *adev)
7402{
7403 struct amdgpu_ring *ring = NULL;
7404 int i;
7405
7406 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7407 ring = &adev->gfx.compute_ring[i];
24de7515 7408 kfree(adev->gfx.mec.mqd_backup[i]);
59a82d7d
XY
7409 amdgpu_bo_free_kernel(&ring->mqd_obj,
7410 &ring->mqd_gpu_addr,
7411 &ring->mqd_ptr);
5ff98043
ML
7412 }
7413
7414 ring = &adev->gfx.kiq.ring;
24de7515 7415 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
59a82d7d
XY
7416 amdgpu_bo_free_kernel(&ring->mqd_obj,
7417 &ring->mqd_gpu_addr,
7418 &ring->mqd_ptr);
7419}