2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #define GFX8_NUM_GFX_RINGS 1
53 #define GFX8_NUM_COMPUTE_RINGS 8
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
59 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
69 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
70 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
71 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
73 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
74 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
77 #define SET_BPM_SERDES_CMD 1
78 #define CLE_BPM_SERDES_CMD 0
80 /* BPM Register Address*/
82 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
83 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
84 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
85 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
86 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
90 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
124 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
126 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
127 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
128 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
129 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
130 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
131 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
132 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
133 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
134 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
135 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
136 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
137 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
138 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
139 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
140 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
141 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
144 static const u32 golden_settings_tonga_a11[] =
146 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
147 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
148 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
149 mmGB_GPU_ID, 0x0000000f, 0x00000000,
150 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
151 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
152 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
153 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
154 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
155 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
156 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
157 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
158 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
159 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
160 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
163 static const u32 tonga_golden_common_all[] =
165 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
166 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
167 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
168 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
169 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
170 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
171 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
172 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
175 static const u32 tonga_mgcg_cgcg_init[] =
177 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
178 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
179 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
180 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
181 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
182 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
183 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
184 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
185 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
186 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
187 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
188 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
189 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
190 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
191 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
192 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
193 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
194 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
195 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
196 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
197 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
198 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
199 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
200 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
201 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
202 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
203 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
204 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
207 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
208 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
209 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
210 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
211 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
212 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
213 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
214 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
215 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
216 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
217 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
218 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
219 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
220 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
221 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
222 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
223 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
224 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
225 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
226 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
227 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
228 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
229 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
230 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
231 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
232 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
233 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
236 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
241 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
246 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
249 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
250 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
251 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
254 static const u32 fiji_golden_common_all[] =
256 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
257 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
258 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
259 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
260 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
261 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
262 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
263 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
264 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
265 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
268 static const u32 golden_settings_fiji_a10[] =
270 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
271 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
272 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
273 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
274 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
275 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
276 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
277 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
278 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
279 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
280 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
283 static const u32 fiji_mgcg_cgcg_init[] =
285 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
286 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
287 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
288 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
289 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
290 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
291 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
292 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
293 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
294 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
295 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
296 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
297 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
298 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
299 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
300 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
301 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
302 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
303 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
304 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
305 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
306 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
307 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
308 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
309 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
310 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
311 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
312 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
313 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
314 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
315 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
316 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
317 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
318 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
319 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
322 static const u32 golden_settings_iceland_a11[] =
324 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
325 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
326 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
327 mmGB_GPU_ID, 0x0000000f, 0x00000000,
328 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
329 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
330 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
331 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
332 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
333 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
334 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
335 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
336 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
337 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
338 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
341 static const u32 iceland_golden_common_all[] =
343 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
344 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
345 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
346 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
347 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
348 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
349 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
350 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
353 static const u32 iceland_mgcg_cgcg_init[] =
355 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
356 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
358 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
359 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
360 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
361 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
362 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
363 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
364 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
365 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
366 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
367 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
372 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
373 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
374 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
375 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
376 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
377 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
378 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
380 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
381 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
382 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
383 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
384 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
385 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
386 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
387 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
388 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
389 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
390 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
391 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
392 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
393 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
394 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
395 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
396 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
397 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
398 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
399 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
400 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
401 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
402 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
403 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
404 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
405 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
406 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
407 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
408 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
409 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
410 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
411 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
412 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
413 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
414 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
415 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
416 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
417 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
418 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
421 static const u32 cz_golden_settings_a11[] =
423 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
424 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
425 mmGB_GPU_ID, 0x0000000f, 0x00000000,
426 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
427 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
428 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
429 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
430 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
431 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
432 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
435 static const u32 cz_golden_common_all[] =
437 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
438 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
439 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
440 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
441 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
442 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
443 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
444 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
447 static const u32 cz_mgcg_cgcg_init[] =
449 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
450 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
451 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
458 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
460 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
464 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
465 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
468 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
469 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
470 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
471 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
472 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
473 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
474 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
475 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
476 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
477 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
478 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
479 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
480 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
483 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
488 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
491 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
492 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
493 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
494 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
495 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
496 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
497 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
498 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
499 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
500 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
501 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
502 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
503 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
504 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
505 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
506 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
507 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
508 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
509 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
510 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
511 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
512 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
513 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
514 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
515 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
516 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
517 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
518 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
519 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
520 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
521 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
522 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
523 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
526 static const u32 stoney_golden_settings_a11[] =
528 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
529 mmGB_GPU_ID, 0x0000000f, 0x00000000,
530 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
531 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
532 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
533 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
534 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
535 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
536 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
537 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
540 static const u32 stoney_golden_common_all[] =
542 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
544 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
545 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
546 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
547 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
548 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
549 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
552 static const u32 stoney_mgcg_cgcg_init[] =
554 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
555 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
556 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
557 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
558 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
559 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
562 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
563 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
564 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
566 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
568 switch (adev->asic_type) {
570 amdgpu_program_register_sequence(adev,
571 iceland_mgcg_cgcg_init,
572 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
573 amdgpu_program_register_sequence(adev,
574 golden_settings_iceland_a11,
575 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
576 amdgpu_program_register_sequence(adev,
577 iceland_golden_common_all,
578 (const u32)ARRAY_SIZE(iceland_golden_common_all));
581 amdgpu_program_register_sequence(adev,
583 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
584 amdgpu_program_register_sequence(adev,
585 golden_settings_fiji_a10,
586 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
587 amdgpu_program_register_sequence(adev,
588 fiji_golden_common_all,
589 (const u32)ARRAY_SIZE(fiji_golden_common_all));
593 amdgpu_program_register_sequence(adev,
594 tonga_mgcg_cgcg_init,
595 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
596 amdgpu_program_register_sequence(adev,
597 golden_settings_tonga_a11,
598 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
599 amdgpu_program_register_sequence(adev,
600 tonga_golden_common_all,
601 (const u32)ARRAY_SIZE(tonga_golden_common_all));
604 amdgpu_program_register_sequence(adev,
606 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
607 amdgpu_program_register_sequence(adev,
608 cz_golden_settings_a11,
609 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
610 amdgpu_program_register_sequence(adev,
611 cz_golden_common_all,
612 (const u32)ARRAY_SIZE(cz_golden_common_all));
615 amdgpu_program_register_sequence(adev,
616 stoney_mgcg_cgcg_init,
617 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
618 amdgpu_program_register_sequence(adev,
619 stoney_golden_settings_a11,
620 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
621 amdgpu_program_register_sequence(adev,
622 stoney_golden_common_all,
623 (const u32)ARRAY_SIZE(stoney_golden_common_all));
630 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
634 adev->gfx.scratch.num_reg = 7;
635 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
636 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
637 adev->gfx.scratch.free[i] = true;
638 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
642 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
644 struct amdgpu_device *adev = ring->adev;
650 r = amdgpu_gfx_scratch_get(adev, &scratch);
652 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
655 WREG32(scratch, 0xCAFEDEAD);
656 r = amdgpu_ring_lock(ring, 3);
658 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
660 amdgpu_gfx_scratch_free(adev, scratch);
663 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
664 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
665 amdgpu_ring_write(ring, 0xDEADBEEF);
666 amdgpu_ring_unlock_commit(ring);
668 for (i = 0; i < adev->usec_timeout; i++) {
669 tmp = RREG32(scratch);
670 if (tmp == 0xDEADBEEF)
674 if (i < adev->usec_timeout) {
675 DRM_INFO("ring test on %d succeeded in %d usecs\n",
678 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
679 ring->idx, scratch, tmp);
682 amdgpu_gfx_scratch_free(adev, scratch);
686 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
688 struct amdgpu_device *adev = ring->adev;
690 struct fence *f = NULL;
696 r = amdgpu_gfx_scratch_get(adev, &scratch);
698 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
701 WREG32(scratch, 0xCAFEDEAD);
702 memset(&ib, 0, sizeof(ib));
703 r = amdgpu_ib_get(ring, NULL, 256, &ib);
705 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
708 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
709 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
710 ib.ptr[2] = 0xDEADBEEF;
713 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
714 AMDGPU_FENCE_OWNER_UNDEFINED,
719 r = fence_wait(f, false);
721 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
724 for (i = 0; i < adev->usec_timeout; i++) {
725 tmp = RREG32(scratch);
726 if (tmp == 0xDEADBEEF)
730 if (i < adev->usec_timeout) {
731 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
735 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
741 amdgpu_ib_free(adev, &ib);
743 amdgpu_gfx_scratch_free(adev, scratch);
747 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
749 const char *chip_name;
752 struct amdgpu_firmware_info *info = NULL;
753 const struct common_firmware_header *header = NULL;
754 const struct gfx_firmware_header_v1_0 *cp_hdr;
758 switch (adev->asic_type) {
766 chip_name = "carrizo";
772 chip_name = "stoney";
778 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
779 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
782 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
785 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
786 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
787 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
789 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
790 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
793 err = amdgpu_ucode_validate(adev->gfx.me_fw);
796 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
797 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
798 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
800 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
801 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
804 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
807 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
808 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
809 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
811 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
812 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
815 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
816 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
817 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
818 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
820 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
821 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
824 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
827 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
828 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
829 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
831 if (adev->asic_type != CHIP_STONEY) {
832 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
833 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
835 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
838 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
839 adev->gfx.mec2_fw->data;
840 adev->gfx.mec2_fw_version =
841 le32_to_cpu(cp_hdr->header.ucode_version);
842 adev->gfx.mec2_feature_version =
843 le32_to_cpu(cp_hdr->ucode_feature_version);
846 adev->gfx.mec2_fw = NULL;
850 if (adev->firmware.smu_load) {
851 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
852 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
853 info->fw = adev->gfx.pfp_fw;
854 header = (const struct common_firmware_header *)info->fw->data;
855 adev->firmware.fw_size +=
856 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
858 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
859 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
860 info->fw = adev->gfx.me_fw;
861 header = (const struct common_firmware_header *)info->fw->data;
862 adev->firmware.fw_size +=
863 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
865 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
866 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
867 info->fw = adev->gfx.ce_fw;
868 header = (const struct common_firmware_header *)info->fw->data;
869 adev->firmware.fw_size +=
870 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
872 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
873 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
874 info->fw = adev->gfx.rlc_fw;
875 header = (const struct common_firmware_header *)info->fw->data;
876 adev->firmware.fw_size +=
877 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
879 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
880 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
881 info->fw = adev->gfx.mec_fw;
882 header = (const struct common_firmware_header *)info->fw->data;
883 adev->firmware.fw_size +=
884 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
886 if (adev->gfx.mec2_fw) {
887 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
888 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
889 info->fw = adev->gfx.mec2_fw;
890 header = (const struct common_firmware_header *)info->fw->data;
891 adev->firmware.fw_size +=
892 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
900 "gfx8: Failed to load firmware \"%s\"\n",
902 release_firmware(adev->gfx.pfp_fw);
903 adev->gfx.pfp_fw = NULL;
904 release_firmware(adev->gfx.me_fw);
905 adev->gfx.me_fw = NULL;
906 release_firmware(adev->gfx.ce_fw);
907 adev->gfx.ce_fw = NULL;
908 release_firmware(adev->gfx.rlc_fw);
909 adev->gfx.rlc_fw = NULL;
910 release_firmware(adev->gfx.mec_fw);
911 adev->gfx.mec_fw = NULL;
912 release_firmware(adev->gfx.mec2_fw);
913 adev->gfx.mec2_fw = NULL;
918 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
922 if (adev->gfx.mec.hpd_eop_obj) {
923 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
924 if (unlikely(r != 0))
925 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
926 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
927 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
929 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
930 adev->gfx.mec.hpd_eop_obj = NULL;
934 #define MEC_HPD_SIZE 2048
936 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
942 * we assign only 1 pipe because all other pipes will
945 adev->gfx.mec.num_mec = 1;
946 adev->gfx.mec.num_pipe = 1;
947 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
949 if (adev->gfx.mec.hpd_eop_obj == NULL) {
950 r = amdgpu_bo_create(adev,
951 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
953 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
954 &adev->gfx.mec.hpd_eop_obj);
956 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
961 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
962 if (unlikely(r != 0)) {
963 gfx_v8_0_mec_fini(adev);
966 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
967 &adev->gfx.mec.hpd_eop_gpu_addr);
969 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
970 gfx_v8_0_mec_fini(adev);
973 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
975 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
976 gfx_v8_0_mec_fini(adev);
980 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
982 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
983 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
988 static const u32 vgpr_init_compute_shader[] =
990 0x7e000209, 0x7e020208,
991 0x7e040207, 0x7e060206,
992 0x7e080205, 0x7e0a0204,
993 0x7e0c0203, 0x7e0e0202,
994 0x7e100201, 0x7e120200,
995 0x7e140209, 0x7e160208,
996 0x7e180207, 0x7e1a0206,
997 0x7e1c0205, 0x7e1e0204,
998 0x7e200203, 0x7e220202,
999 0x7e240201, 0x7e260200,
1000 0x7e280209, 0x7e2a0208,
1001 0x7e2c0207, 0x7e2e0206,
1002 0x7e300205, 0x7e320204,
1003 0x7e340203, 0x7e360202,
1004 0x7e380201, 0x7e3a0200,
1005 0x7e3c0209, 0x7e3e0208,
1006 0x7e400207, 0x7e420206,
1007 0x7e440205, 0x7e460204,
1008 0x7e480203, 0x7e4a0202,
1009 0x7e4c0201, 0x7e4e0200,
1010 0x7e500209, 0x7e520208,
1011 0x7e540207, 0x7e560206,
1012 0x7e580205, 0x7e5a0204,
1013 0x7e5c0203, 0x7e5e0202,
1014 0x7e600201, 0x7e620200,
1015 0x7e640209, 0x7e660208,
1016 0x7e680207, 0x7e6a0206,
1017 0x7e6c0205, 0x7e6e0204,
1018 0x7e700203, 0x7e720202,
1019 0x7e740201, 0x7e760200,
1020 0x7e780209, 0x7e7a0208,
1021 0x7e7c0207, 0x7e7e0206,
1022 0xbf8a0000, 0xbf810000,
1025 static const u32 sgpr_init_compute_shader[] =
1027 0xbe8a0100, 0xbe8c0102,
1028 0xbe8e0104, 0xbe900106,
1029 0xbe920108, 0xbe940100,
1030 0xbe960102, 0xbe980104,
1031 0xbe9a0106, 0xbe9c0108,
1032 0xbe9e0100, 0xbea00102,
1033 0xbea20104, 0xbea40106,
1034 0xbea60108, 0xbea80100,
1035 0xbeaa0102, 0xbeac0104,
1036 0xbeae0106, 0xbeb00108,
1037 0xbeb20100, 0xbeb40102,
1038 0xbeb60104, 0xbeb80106,
1039 0xbeba0108, 0xbebc0100,
1040 0xbebe0102, 0xbec00104,
1041 0xbec20106, 0xbec40108,
1042 0xbec60100, 0xbec80102,
1043 0xbee60004, 0xbee70005,
1044 0xbeea0006, 0xbeeb0007,
1045 0xbee80008, 0xbee90009,
1046 0xbefc0000, 0xbf8a0000,
1047 0xbf810000, 0x00000000,
1050 static const u32 vgpr_init_regs[] =
1052 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1053 mmCOMPUTE_RESOURCE_LIMITS, 0,
1054 mmCOMPUTE_NUM_THREAD_X, 256*4,
1055 mmCOMPUTE_NUM_THREAD_Y, 1,
1056 mmCOMPUTE_NUM_THREAD_Z, 1,
1057 mmCOMPUTE_PGM_RSRC2, 20,
1058 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1059 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1060 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1061 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1062 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1063 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1064 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1065 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1066 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1067 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1070 static const u32 sgpr1_init_regs[] =
1072 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1073 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1074 mmCOMPUTE_NUM_THREAD_X, 256*5,
1075 mmCOMPUTE_NUM_THREAD_Y, 1,
1076 mmCOMPUTE_NUM_THREAD_Z, 1,
1077 mmCOMPUTE_PGM_RSRC2, 20,
1078 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1079 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1080 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1081 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1082 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1083 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1084 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1085 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1086 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1087 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1090 static const u32 sgpr2_init_regs[] =
1092 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1093 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1094 mmCOMPUTE_NUM_THREAD_X, 256*5,
1095 mmCOMPUTE_NUM_THREAD_Y, 1,
1096 mmCOMPUTE_NUM_THREAD_Z, 1,
1097 mmCOMPUTE_PGM_RSRC2, 20,
1098 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1099 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1100 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1101 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1102 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1103 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1104 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1105 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1106 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1107 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1110 static const u32 sec_ded_counter_registers[] =
1113 mmCPC_EDC_SCRATCH_CNT,
1114 mmCPC_EDC_UCODE_CNT,
1121 mmDC_EDC_CSINVOC_CNT,
1122 mmDC_EDC_RESTORE_CNT,
1128 mmSQC_ATC_EDC_GATCL1_CNT,
1134 mmTCP_ATC_EDC_GATCL1_CNT,
1139 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1141 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1142 struct amdgpu_ib ib;
1143 struct fence *f = NULL;
1146 unsigned total_size, vgpr_offset, sgpr_offset;
1149 /* only supported on CZ */
1150 if (adev->asic_type != CHIP_CARRIZO)
1153 /* bail if the compute ring is not ready */
1157 tmp = RREG32(mmGB_EDC_MODE);
1158 WREG32(mmGB_EDC_MODE, 0);
1161 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1163 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1165 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1166 total_size = ALIGN(total_size, 256);
1167 vgpr_offset = total_size;
1168 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1169 sgpr_offset = total_size;
1170 total_size += sizeof(sgpr_init_compute_shader);
1172 /* allocate an indirect buffer to put the commands in */
1173 memset(&ib, 0, sizeof(ib));
1174 r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1176 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1180 /* load the compute shaders */
1181 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1182 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1184 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1185 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1187 /* init the ib length to 0 */
1191 /* write the register state for the compute dispatch */
1192 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1193 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1194 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1195 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1197 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1198 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1199 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1200 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1201 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1202 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1204 /* write dispatch packet */
1205 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1206 ib.ptr[ib.length_dw++] = 8; /* x */
1207 ib.ptr[ib.length_dw++] = 1; /* y */
1208 ib.ptr[ib.length_dw++] = 1; /* z */
1209 ib.ptr[ib.length_dw++] =
1210 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1212 /* write CS partial flush packet */
1213 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1214 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1217 /* write the register state for the compute dispatch */
1218 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1219 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1220 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1221 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1223 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1224 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1225 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1226 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1227 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1228 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1230 /* write dispatch packet */
1231 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1232 ib.ptr[ib.length_dw++] = 8; /* x */
1233 ib.ptr[ib.length_dw++] = 1; /* y */
1234 ib.ptr[ib.length_dw++] = 1; /* z */
1235 ib.ptr[ib.length_dw++] =
1236 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1238 /* write CS partial flush packet */
1239 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1240 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1243 /* write the register state for the compute dispatch */
1244 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1245 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1246 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1247 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1249 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1250 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1251 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1252 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1253 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1254 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1256 /* write dispatch packet */
1257 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1258 ib.ptr[ib.length_dw++] = 8; /* x */
1259 ib.ptr[ib.length_dw++] = 1; /* y */
1260 ib.ptr[ib.length_dw++] = 1; /* z */
1261 ib.ptr[ib.length_dw++] =
1262 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1264 /* write CS partial flush packet */
1265 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1266 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1268 /* shedule the ib on the ring */
1269 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1270 AMDGPU_FENCE_OWNER_UNDEFINED,
1273 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1277 /* wait for the GPU to finish processing the IB */
1278 r = fence_wait(f, false);
1280 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1284 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1285 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1286 WREG32(mmGB_EDC_MODE, tmp);
1288 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1289 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1290 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1293 /* read back registers to clear the counters */
1294 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1295 RREG32(sec_ded_counter_registers[i]);
1299 amdgpu_ib_free(adev, &ib);
1304 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1307 u32 mc_shared_chmap, mc_arb_ramcfg;
1308 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1311 switch (adev->asic_type) {
1313 adev->gfx.config.max_shader_engines = 1;
1314 adev->gfx.config.max_tile_pipes = 2;
1315 adev->gfx.config.max_cu_per_sh = 6;
1316 adev->gfx.config.max_sh_per_se = 1;
1317 adev->gfx.config.max_backends_per_se = 2;
1318 adev->gfx.config.max_texture_channel_caches = 2;
1319 adev->gfx.config.max_gprs = 256;
1320 adev->gfx.config.max_gs_threads = 32;
1321 adev->gfx.config.max_hw_contexts = 8;
1323 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1324 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1325 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1326 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1327 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1330 adev->gfx.config.max_shader_engines = 4;
1331 adev->gfx.config.max_tile_pipes = 16;
1332 adev->gfx.config.max_cu_per_sh = 16;
1333 adev->gfx.config.max_sh_per_se = 1;
1334 adev->gfx.config.max_backends_per_se = 4;
1335 adev->gfx.config.max_texture_channel_caches = 16;
1336 adev->gfx.config.max_gprs = 256;
1337 adev->gfx.config.max_gs_threads = 32;
1338 adev->gfx.config.max_hw_contexts = 8;
1340 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1344 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1347 adev->gfx.config.max_shader_engines = 4;
1348 adev->gfx.config.max_tile_pipes = 8;
1349 adev->gfx.config.max_cu_per_sh = 8;
1350 adev->gfx.config.max_sh_per_se = 1;
1351 adev->gfx.config.max_backends_per_se = 2;
1352 adev->gfx.config.max_texture_channel_caches = 8;
1353 adev->gfx.config.max_gprs = 256;
1354 adev->gfx.config.max_gs_threads = 32;
1355 adev->gfx.config.max_hw_contexts = 8;
1357 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1361 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1364 adev->gfx.config.max_shader_engines = 1;
1365 adev->gfx.config.max_tile_pipes = 2;
1366 adev->gfx.config.max_sh_per_se = 1;
1367 adev->gfx.config.max_backends_per_se = 2;
1369 switch (adev->pdev->revision) {
1377 adev->gfx.config.max_cu_per_sh = 8;
1387 adev->gfx.config.max_cu_per_sh = 6;
1394 adev->gfx.config.max_cu_per_sh = 6;
1403 adev->gfx.config.max_cu_per_sh = 4;
1407 adev->gfx.config.max_texture_channel_caches = 2;
1408 adev->gfx.config.max_gprs = 256;
1409 adev->gfx.config.max_gs_threads = 32;
1410 adev->gfx.config.max_hw_contexts = 8;
1412 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1413 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1414 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1415 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1416 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1419 adev->gfx.config.max_shader_engines = 1;
1420 adev->gfx.config.max_tile_pipes = 2;
1421 adev->gfx.config.max_sh_per_se = 1;
1422 adev->gfx.config.max_backends_per_se = 1;
1424 switch (adev->pdev->revision) {
1431 adev->gfx.config.max_cu_per_sh = 3;
1437 adev->gfx.config.max_cu_per_sh = 2;
1441 adev->gfx.config.max_texture_channel_caches = 2;
1442 adev->gfx.config.max_gprs = 256;
1443 adev->gfx.config.max_gs_threads = 16;
1444 adev->gfx.config.max_hw_contexts = 8;
1446 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1447 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1448 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1449 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1450 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1453 adev->gfx.config.max_shader_engines = 2;
1454 adev->gfx.config.max_tile_pipes = 4;
1455 adev->gfx.config.max_cu_per_sh = 2;
1456 adev->gfx.config.max_sh_per_se = 1;
1457 adev->gfx.config.max_backends_per_se = 2;
1458 adev->gfx.config.max_texture_channel_caches = 4;
1459 adev->gfx.config.max_gprs = 256;
1460 adev->gfx.config.max_gs_threads = 32;
1461 adev->gfx.config.max_hw_contexts = 8;
1463 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1464 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1465 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1466 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1467 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1471 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1472 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1473 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1475 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1476 adev->gfx.config.mem_max_burst_length_bytes = 256;
1477 if (adev->flags & AMD_IS_APU) {
1478 /* Get memory bank mapping mode. */
1479 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1480 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1481 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1483 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1484 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1485 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1487 /* Validate settings in case only one DIMM installed. */
1488 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1489 dimm00_addr_map = 0;
1490 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1491 dimm01_addr_map = 0;
1492 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1493 dimm10_addr_map = 0;
1494 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1495 dimm11_addr_map = 0;
1497 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1498 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1499 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1500 adev->gfx.config.mem_row_size_in_kb = 2;
1502 adev->gfx.config.mem_row_size_in_kb = 1;
1504 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1505 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1506 if (adev->gfx.config.mem_row_size_in_kb > 4)
1507 adev->gfx.config.mem_row_size_in_kb = 4;
1510 adev->gfx.config.shader_engine_tile_size = 32;
1511 adev->gfx.config.num_gpus = 1;
1512 adev->gfx.config.multi_gpu_tile_size = 64;
1514 /* fix up row size */
1515 switch (adev->gfx.config.mem_row_size_in_kb) {
1518 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1521 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1524 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1527 adev->gfx.config.gb_addr_config = gb_addr_config;
1530 static int gfx_v8_0_sw_init(void *handle)
1533 struct amdgpu_ring *ring;
1534 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1537 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1541 /* Privileged reg */
1542 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1546 /* Privileged inst */
1547 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1551 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1553 gfx_v8_0_scratch_init(adev);
1555 r = gfx_v8_0_init_microcode(adev);
1557 DRM_ERROR("Failed to load gfx firmware!\n");
1561 r = gfx_v8_0_mec_init(adev);
1563 DRM_ERROR("Failed to init MEC BOs!\n");
1567 /* set up the gfx ring */
1568 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1569 ring = &adev->gfx.gfx_ring[i];
1570 ring->ring_obj = NULL;
1571 sprintf(ring->name, "gfx");
1572 /* no gfx doorbells on iceland */
1573 if (adev->asic_type != CHIP_TOPAZ) {
1574 ring->use_doorbell = true;
1575 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1578 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1579 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1580 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1581 AMDGPU_RING_TYPE_GFX);
1586 /* set up the compute queues */
1587 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1590 /* max 32 queues per MEC */
1591 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1592 DRM_ERROR("Too many (%d) compute rings!\n", i);
1595 ring = &adev->gfx.compute_ring[i];
1596 ring->ring_obj = NULL;
1597 ring->use_doorbell = true;
1598 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1599 ring->me = 1; /* first MEC */
1601 ring->queue = i % 8;
1602 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1603 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1604 /* type-2 packets are deprecated on MEC, use type-3 instead */
1605 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1606 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1607 &adev->gfx.eop_irq, irq_type,
1608 AMDGPU_RING_TYPE_COMPUTE);
1613 /* reserve GDS, GWS and OA resource for gfx */
1614 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1616 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1617 NULL, &adev->gds.gds_gfx_bo);
1621 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1623 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1624 NULL, &adev->gds.gws_gfx_bo);
1628 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1630 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1631 NULL, &adev->gds.oa_gfx_bo);
1635 adev->gfx.ce_ram_size = 0x8000;
1637 gfx_v8_0_gpu_early_init(adev);
1642 static int gfx_v8_0_sw_fini(void *handle)
1645 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1647 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1648 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1649 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1651 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1652 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1653 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1654 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1656 gfx_v8_0_mec_fini(adev);
1661 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1663 uint32_t *modearray, *mod2array;
1664 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1665 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1668 modearray = adev->gfx.config.tile_mode_array;
1669 mod2array = adev->gfx.config.macrotile_mode_array;
1671 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1672 modearray[reg_offset] = 0;
1674 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1675 mod2array[reg_offset] = 0;
1677 switch (adev->asic_type) {
1679 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1680 PIPE_CONFIG(ADDR_SURF_P2) |
1681 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1682 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1683 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1684 PIPE_CONFIG(ADDR_SURF_P2) |
1685 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1686 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1687 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688 PIPE_CONFIG(ADDR_SURF_P2) |
1689 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1690 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1691 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1692 PIPE_CONFIG(ADDR_SURF_P2) |
1693 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1694 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1695 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1696 PIPE_CONFIG(ADDR_SURF_P2) |
1697 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1698 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1699 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1700 PIPE_CONFIG(ADDR_SURF_P2) |
1701 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1702 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1703 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1704 PIPE_CONFIG(ADDR_SURF_P2) |
1705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1706 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1707 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1708 PIPE_CONFIG(ADDR_SURF_P2));
1709 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1710 PIPE_CONFIG(ADDR_SURF_P2) |
1711 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1713 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1714 PIPE_CONFIG(ADDR_SURF_P2) |
1715 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1717 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1718 PIPE_CONFIG(ADDR_SURF_P2) |
1719 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1721 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1722 PIPE_CONFIG(ADDR_SURF_P2) |
1723 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1725 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1726 PIPE_CONFIG(ADDR_SURF_P2) |
1727 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1729 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1730 PIPE_CONFIG(ADDR_SURF_P2) |
1731 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1733 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1734 PIPE_CONFIG(ADDR_SURF_P2) |
1735 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1737 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1738 PIPE_CONFIG(ADDR_SURF_P2) |
1739 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1741 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1742 PIPE_CONFIG(ADDR_SURF_P2) |
1743 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1746 PIPE_CONFIG(ADDR_SURF_P2) |
1747 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1749 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1750 PIPE_CONFIG(ADDR_SURF_P2) |
1751 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1753 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1754 PIPE_CONFIG(ADDR_SURF_P2) |
1755 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1758 PIPE_CONFIG(ADDR_SURF_P2) |
1759 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1760 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1761 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1762 PIPE_CONFIG(ADDR_SURF_P2) |
1763 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1765 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1766 PIPE_CONFIG(ADDR_SURF_P2) |
1767 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1769 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1770 PIPE_CONFIG(ADDR_SURF_P2) |
1771 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1773 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774 PIPE_CONFIG(ADDR_SURF_P2) |
1775 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1777 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1778 PIPE_CONFIG(ADDR_SURF_P2) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1782 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1785 NUM_BANKS(ADDR_SURF_8_BANK));
1786 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789 NUM_BANKS(ADDR_SURF_8_BANK));
1790 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1793 NUM_BANKS(ADDR_SURF_8_BANK));
1794 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1797 NUM_BANKS(ADDR_SURF_8_BANK));
1798 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1800 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801 NUM_BANKS(ADDR_SURF_8_BANK));
1802 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1803 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1804 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1805 NUM_BANKS(ADDR_SURF_8_BANK));
1806 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1809 NUM_BANKS(ADDR_SURF_8_BANK));
1810 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1812 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813 NUM_BANKS(ADDR_SURF_16_BANK));
1814 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1815 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1816 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1817 NUM_BANKS(ADDR_SURF_16_BANK));
1818 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1819 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1820 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1821 NUM_BANKS(ADDR_SURF_16_BANK));
1822 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1823 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825 NUM_BANKS(ADDR_SURF_16_BANK));
1826 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1829 NUM_BANKS(ADDR_SURF_16_BANK));
1830 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1831 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1832 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1833 NUM_BANKS(ADDR_SURF_16_BANK));
1834 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1837 NUM_BANKS(ADDR_SURF_8_BANK));
1839 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1840 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1842 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1844 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1845 if (reg_offset != 7)
1846 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1850 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1854 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1855 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1857 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1858 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1861 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1862 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1863 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1864 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1865 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1866 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1867 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1869 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1870 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1871 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1872 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1873 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1874 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1875 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1876 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1877 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1878 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1879 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1880 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1881 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1882 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1883 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1884 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1885 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1886 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1889 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1890 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1892 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1893 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1894 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1896 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1897 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1898 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1900 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1901 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1902 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1904 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1905 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1906 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1908 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1909 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1910 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1912 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1913 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1914 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1916 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1917 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1918 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1920 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1921 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1922 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1924 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1925 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1926 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1928 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1929 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1930 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1932 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1933 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1934 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1936 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1937 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1938 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1940 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1941 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1942 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1944 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1945 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1946 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1948 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1949 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1952 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1953 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1954 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1956 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1957 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1958 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1960 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1961 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1964 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1965 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1966 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1968 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1969 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1970 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1973 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1976 NUM_BANKS(ADDR_SURF_8_BANK));
1977 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1980 NUM_BANKS(ADDR_SURF_8_BANK));
1981 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1982 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1983 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1984 NUM_BANKS(ADDR_SURF_8_BANK));
1985 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1988 NUM_BANKS(ADDR_SURF_8_BANK));
1989 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1991 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1992 NUM_BANKS(ADDR_SURF_8_BANK));
1993 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1994 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1995 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1996 NUM_BANKS(ADDR_SURF_8_BANK));
1997 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2000 NUM_BANKS(ADDR_SURF_8_BANK));
2001 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2002 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2003 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2004 NUM_BANKS(ADDR_SURF_8_BANK));
2005 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2006 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2007 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2008 NUM_BANKS(ADDR_SURF_8_BANK));
2009 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2012 NUM_BANKS(ADDR_SURF_8_BANK));
2013 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2014 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2015 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2016 NUM_BANKS(ADDR_SURF_8_BANK));
2017 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2018 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2019 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2020 NUM_BANKS(ADDR_SURF_8_BANK));
2021 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2022 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2023 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2024 NUM_BANKS(ADDR_SURF_8_BANK));
2025 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2026 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2027 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2028 NUM_BANKS(ADDR_SURF_4_BANK));
2030 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2031 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2033 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2034 if (reg_offset != 7)
2035 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2039 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2043 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2045 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2047 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2049 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2050 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2051 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2052 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2053 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2055 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2057 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2059 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2060 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2061 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2063 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2065 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2067 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2069 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2070 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2071 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2072 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2073 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2075 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2079 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2083 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2085 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2087 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2089 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2091 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2095 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2097 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2099 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2101 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2103 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2105 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2106 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2107 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2109 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2111 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2113 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2114 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2115 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2117 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2119 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2121 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2122 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2123 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2125 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2126 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2127 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2129 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2130 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2131 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2133 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2134 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2135 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2137 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2138 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2139 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2141 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2142 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2143 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2145 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2146 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2147 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2151 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2154 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2155 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2157 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2159 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2162 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2165 NUM_BANKS(ADDR_SURF_16_BANK));
2166 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2169 NUM_BANKS(ADDR_SURF_16_BANK));
2170 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2173 NUM_BANKS(ADDR_SURF_16_BANK));
2174 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2177 NUM_BANKS(ADDR_SURF_16_BANK));
2178 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2181 NUM_BANKS(ADDR_SURF_16_BANK));
2182 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185 NUM_BANKS(ADDR_SURF_16_BANK));
2186 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2189 NUM_BANKS(ADDR_SURF_16_BANK));
2190 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2193 NUM_BANKS(ADDR_SURF_16_BANK));
2194 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2197 NUM_BANKS(ADDR_SURF_16_BANK));
2198 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201 NUM_BANKS(ADDR_SURF_16_BANK));
2202 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2205 NUM_BANKS(ADDR_SURF_16_BANK));
2206 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209 NUM_BANKS(ADDR_SURF_8_BANK));
2210 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2213 NUM_BANKS(ADDR_SURF_4_BANK));
2214 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2217 NUM_BANKS(ADDR_SURF_4_BANK));
2219 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2220 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2222 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2223 if (reg_offset != 7)
2224 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2228 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 PIPE_CONFIG(ADDR_SURF_P2) |
2230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233 PIPE_CONFIG(ADDR_SURF_P2) |
2234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 PIPE_CONFIG(ADDR_SURF_P2) |
2238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 PIPE_CONFIG(ADDR_SURF_P2) |
2242 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2257 PIPE_CONFIG(ADDR_SURF_P2));
2258 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2259 PIPE_CONFIG(ADDR_SURF_P2) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P2) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P2) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2271 PIPE_CONFIG(ADDR_SURF_P2) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P2) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P2) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P2) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2307 PIPE_CONFIG(ADDR_SURF_P2) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2331 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2334 NUM_BANKS(ADDR_SURF_8_BANK));
2335 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2338 NUM_BANKS(ADDR_SURF_8_BANK));
2339 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 NUM_BANKS(ADDR_SURF_8_BANK));
2343 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346 NUM_BANKS(ADDR_SURF_8_BANK));
2347 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 NUM_BANKS(ADDR_SURF_8_BANK));
2351 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 NUM_BANKS(ADDR_SURF_8_BANK));
2355 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 NUM_BANKS(ADDR_SURF_8_BANK));
2359 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362 NUM_BANKS(ADDR_SURF_16_BANK));
2363 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366 NUM_BANKS(ADDR_SURF_16_BANK));
2367 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370 NUM_BANKS(ADDR_SURF_16_BANK));
2371 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374 NUM_BANKS(ADDR_SURF_16_BANK));
2375 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 NUM_BANKS(ADDR_SURF_16_BANK));
2379 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382 NUM_BANKS(ADDR_SURF_16_BANK));
2383 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386 NUM_BANKS(ADDR_SURF_8_BANK));
2388 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2389 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2391 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2393 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2394 if (reg_offset != 7)
2395 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2400 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2404 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 PIPE_CONFIG(ADDR_SURF_P2) |
2406 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 PIPE_CONFIG(ADDR_SURF_P2) |
2410 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 PIPE_CONFIG(ADDR_SURF_P2) |
2414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P2) |
2418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P2) |
2422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P2) |
2426 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P2) |
2430 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433 PIPE_CONFIG(ADDR_SURF_P2));
2434 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P2) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P2) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P2) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P2) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P2) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P2) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2463 PIPE_CONFIG(ADDR_SURF_P2) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2466 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2467 PIPE_CONFIG(ADDR_SURF_P2) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2470 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2471 PIPE_CONFIG(ADDR_SURF_P2) |
2472 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2475 PIPE_CONFIG(ADDR_SURF_P2) |
2476 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2479 PIPE_CONFIG(ADDR_SURF_P2) |
2480 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2483 PIPE_CONFIG(ADDR_SURF_P2) |
2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2487 PIPE_CONFIG(ADDR_SURF_P2) |
2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2491 PIPE_CONFIG(ADDR_SURF_P2) |
2492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2495 PIPE_CONFIG(ADDR_SURF_P2) |
2496 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 PIPE_CONFIG(ADDR_SURF_P2) |
2500 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503 PIPE_CONFIG(ADDR_SURF_P2) |
2504 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2507 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 NUM_BANKS(ADDR_SURF_8_BANK));
2511 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514 NUM_BANKS(ADDR_SURF_8_BANK));
2515 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 NUM_BANKS(ADDR_SURF_8_BANK));
2519 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 NUM_BANKS(ADDR_SURF_8_BANK));
2523 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526 NUM_BANKS(ADDR_SURF_8_BANK));
2527 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2530 NUM_BANKS(ADDR_SURF_8_BANK));
2531 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2534 NUM_BANKS(ADDR_SURF_8_BANK));
2535 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538 NUM_BANKS(ADDR_SURF_16_BANK));
2539 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542 NUM_BANKS(ADDR_SURF_16_BANK));
2543 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546 NUM_BANKS(ADDR_SURF_16_BANK));
2547 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2550 NUM_BANKS(ADDR_SURF_16_BANK));
2551 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554 NUM_BANKS(ADDR_SURF_16_BANK));
2555 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558 NUM_BANKS(ADDR_SURF_16_BANK));
2559 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562 NUM_BANKS(ADDR_SURF_8_BANK));
2564 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2565 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2567 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2569 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2570 if (reg_offset != 7)
2571 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2577 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2579 return (u32)((1ULL << bit_width) - 1);
2582 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2584 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2586 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2587 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2588 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2589 } else if (se_num == 0xffffffff) {
2590 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2591 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2592 } else if (sh_num == 0xffffffff) {
2593 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2594 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2596 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2597 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2599 WREG32(mmGRBM_GFX_INDEX, data);
2602 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2603 u32 max_rb_num_per_se,
2608 data = RREG32(mmCC_RB_BACKEND_DISABLE);
2609 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2611 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2613 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2615 mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2620 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2621 u32 se_num, u32 sh_per_se,
2622 u32 max_rb_num_per_se)
2626 u32 disabled_rbs = 0;
2627 u32 enabled_rbs = 0;
2629 mutex_lock(&adev->grbm_idx_mutex);
2630 for (i = 0; i < se_num; i++) {
2631 for (j = 0; j < sh_per_se; j++) {
2632 gfx_v8_0_select_se_sh(adev, i, j);
2633 data = gfx_v8_0_get_rb_disabled(adev,
2634 max_rb_num_per_se, sh_per_se);
2635 disabled_rbs |= data << ((i * sh_per_se + j) *
2636 RB_BITMAP_WIDTH_PER_SH);
2639 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2640 mutex_unlock(&adev->grbm_idx_mutex);
2643 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2644 if (!(disabled_rbs & mask))
2645 enabled_rbs |= mask;
2649 adev->gfx.config.backend_enable_mask = enabled_rbs;
2651 mutex_lock(&adev->grbm_idx_mutex);
2652 for (i = 0; i < se_num; i++) {
2653 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2654 data = RREG32(mmPA_SC_RASTER_CONFIG);
2655 for (j = 0; j < sh_per_se; j++) {
2656 switch (enabled_rbs & 3) {
2659 data |= (RASTER_CONFIG_RB_MAP_3 <<
2660 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2662 data |= (RASTER_CONFIG_RB_MAP_0 <<
2663 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2666 data |= (RASTER_CONFIG_RB_MAP_0 <<
2667 (i * sh_per_se + j) * 2);
2670 data |= (RASTER_CONFIG_RB_MAP_3 <<
2671 (i * sh_per_se + j) * 2);
2675 data |= (RASTER_CONFIG_RB_MAP_2 <<
2676 (i * sh_per_se + j) * 2);
2681 WREG32(mmPA_SC_RASTER_CONFIG, data);
2683 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2684 mutex_unlock(&adev->grbm_idx_mutex);
2688 * gfx_v8_0_init_compute_vmid - gart enable
2690 * @rdev: amdgpu_device pointer
2692 * Initialize compute vmid sh_mem registers
2695 #define DEFAULT_SH_MEM_BASES (0x6000)
2696 #define FIRST_COMPUTE_VMID (8)
2697 #define LAST_COMPUTE_VMID (16)
2698 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2701 uint32_t sh_mem_config;
2702 uint32_t sh_mem_bases;
2705 * Configure apertures:
2706 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2707 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2708 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2710 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2712 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2713 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2714 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2715 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2716 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2717 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2719 mutex_lock(&adev->srbm_mutex);
2720 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2721 vi_srbm_select(adev, 0, 0, 0, i);
2722 /* CP and shaders */
2723 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2724 WREG32(mmSH_MEM_APE1_BASE, 1);
2725 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2726 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2728 vi_srbm_select(adev, 0, 0, 0, 0);
2729 mutex_unlock(&adev->srbm_mutex);
2732 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2737 tmp = RREG32(mmGRBM_CNTL);
2738 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2739 WREG32(mmGRBM_CNTL, tmp);
2741 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2742 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2743 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2744 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2745 adev->gfx.config.gb_addr_config & 0x70);
2746 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2747 adev->gfx.config.gb_addr_config & 0x70);
2748 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2749 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2750 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2752 gfx_v8_0_tiling_mode_table_init(adev);
2754 gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2755 adev->gfx.config.max_sh_per_se,
2756 adev->gfx.config.max_backends_per_se);
2758 /* XXX SH_MEM regs */
2759 /* where to put LDS, scratch, GPUVM in FSA64 space */
2760 mutex_lock(&adev->srbm_mutex);
2761 for (i = 0; i < 16; i++) {
2762 vi_srbm_select(adev, 0, 0, 0, i);
2763 /* CP and shaders */
2765 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2766 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2767 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2768 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2769 WREG32(mmSH_MEM_CONFIG, tmp);
2771 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2772 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2773 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2774 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2775 WREG32(mmSH_MEM_CONFIG, tmp);
2778 WREG32(mmSH_MEM_APE1_BASE, 1);
2779 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2780 WREG32(mmSH_MEM_BASES, 0);
2782 vi_srbm_select(adev, 0, 0, 0, 0);
2783 mutex_unlock(&adev->srbm_mutex);
2785 gfx_v8_0_init_compute_vmid(adev);
2787 mutex_lock(&adev->grbm_idx_mutex);
2789 * making sure that the following register writes will be broadcasted
2790 * to all the shaders
2792 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2794 WREG32(mmPA_SC_FIFO_SIZE,
2795 (adev->gfx.config.sc_prim_fifo_size_frontend <<
2796 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2797 (adev->gfx.config.sc_prim_fifo_size_backend <<
2798 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2799 (adev->gfx.config.sc_hiz_tile_fifo_size <<
2800 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2801 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2802 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2803 mutex_unlock(&adev->grbm_idx_mutex);
2807 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2812 mutex_lock(&adev->grbm_idx_mutex);
2813 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2814 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2815 gfx_v8_0_select_se_sh(adev, i, j);
2816 for (k = 0; k < adev->usec_timeout; k++) {
2817 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2823 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2824 mutex_unlock(&adev->grbm_idx_mutex);
2826 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2827 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2828 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2829 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2830 for (k = 0; k < adev->usec_timeout; k++) {
2831 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2837 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2840 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2842 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2843 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2844 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2845 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2847 WREG32(mmCP_INT_CNTL_RING0, tmp);
2850 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2852 u32 tmp = RREG32(mmRLC_CNTL);
2854 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2855 WREG32(mmRLC_CNTL, tmp);
2857 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2859 gfx_v8_0_wait_for_rlc_serdes(adev);
2862 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2864 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2866 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2867 WREG32(mmGRBM_SOFT_RESET, tmp);
2869 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2870 WREG32(mmGRBM_SOFT_RESET, tmp);
2874 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2876 u32 tmp = RREG32(mmRLC_CNTL);
2878 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2879 WREG32(mmRLC_CNTL, tmp);
2881 /* carrizo do enable cp interrupt after cp inited */
2882 if (!(adev->flags & AMD_IS_APU))
2883 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2888 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2890 const struct rlc_firmware_header_v2_0 *hdr;
2891 const __le32 *fw_data;
2892 unsigned i, fw_size;
2894 if (!adev->gfx.rlc_fw)
2897 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2898 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2900 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2901 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2902 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2904 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2905 for (i = 0; i < fw_size; i++)
2906 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2907 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2912 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2916 gfx_v8_0_rlc_stop(adev);
2919 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2922 WREG32(mmRLC_PG_CNTL, 0);
2924 gfx_v8_0_rlc_reset(adev);
2926 if (!adev->pp_enabled) {
2927 if (!adev->firmware.smu_load) {
2928 /* legacy rlc firmware loading */
2929 r = gfx_v8_0_rlc_load_microcode(adev);
2933 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2934 AMDGPU_UCODE_ID_RLC_G);
2940 gfx_v8_0_rlc_start(adev);
2945 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2948 u32 tmp = RREG32(mmCP_ME_CNTL);
2951 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2952 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2953 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2955 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2956 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2957 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2958 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2959 adev->gfx.gfx_ring[i].ready = false;
2961 WREG32(mmCP_ME_CNTL, tmp);
2965 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2967 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2968 const struct gfx_firmware_header_v1_0 *ce_hdr;
2969 const struct gfx_firmware_header_v1_0 *me_hdr;
2970 const __le32 *fw_data;
2971 unsigned i, fw_size;
2973 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2976 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2977 adev->gfx.pfp_fw->data;
2978 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2979 adev->gfx.ce_fw->data;
2980 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2981 adev->gfx.me_fw->data;
2983 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2984 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2985 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2987 gfx_v8_0_cp_gfx_enable(adev, false);
2990 fw_data = (const __le32 *)
2991 (adev->gfx.pfp_fw->data +
2992 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2993 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2994 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2995 for (i = 0; i < fw_size; i++)
2996 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2997 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3000 fw_data = (const __le32 *)
3001 (adev->gfx.ce_fw->data +
3002 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3003 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3004 WREG32(mmCP_CE_UCODE_ADDR, 0);
3005 for (i = 0; i < fw_size; i++)
3006 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3007 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3010 fw_data = (const __le32 *)
3011 (adev->gfx.me_fw->data +
3012 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3013 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3014 WREG32(mmCP_ME_RAM_WADDR, 0);
3015 for (i = 0; i < fw_size; i++)
3016 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3017 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3022 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3025 const struct cs_section_def *sect = NULL;
3026 const struct cs_extent_def *ext = NULL;
3028 /* begin clear state */
3030 /* context control state */
3033 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3034 for (ext = sect->section; ext->extent != NULL; ++ext) {
3035 if (sect->id == SECT_CONTEXT)
3036 count += 2 + ext->reg_count;
3041 /* pa_sc_raster_config/pa_sc_raster_config1 */
3043 /* end clear state */
3051 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3053 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3054 const struct cs_section_def *sect = NULL;
3055 const struct cs_extent_def *ext = NULL;
3059 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3060 WREG32(mmCP_ENDIAN_SWAP, 0);
3061 WREG32(mmCP_DEVICE_ID, 1);
3063 gfx_v8_0_cp_gfx_enable(adev, true);
3065 r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
3067 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3071 /* clear state buffer */
3072 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3075 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3076 amdgpu_ring_write(ring, 0x80000000);
3077 amdgpu_ring_write(ring, 0x80000000);
3079 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3080 for (ext = sect->section; ext->extent != NULL; ++ext) {
3081 if (sect->id == SECT_CONTEXT) {
3082 amdgpu_ring_write(ring,
3083 PACKET3(PACKET3_SET_CONTEXT_REG,
3085 amdgpu_ring_write(ring,
3086 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3087 for (i = 0; i < ext->reg_count; i++)
3088 amdgpu_ring_write(ring, ext->extent[i]);
3093 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3094 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3095 switch (adev->asic_type) {
3097 amdgpu_ring_write(ring, 0x16000012);
3098 amdgpu_ring_write(ring, 0x0000002A);
3101 amdgpu_ring_write(ring, 0x3a00161a);
3102 amdgpu_ring_write(ring, 0x0000002e);
3106 amdgpu_ring_write(ring, 0x00000002);
3107 amdgpu_ring_write(ring, 0x00000000);
3110 amdgpu_ring_write(ring, 0x00000000);
3111 amdgpu_ring_write(ring, 0x00000000);
3117 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3118 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3120 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3121 amdgpu_ring_write(ring, 0);
3123 /* init the CE partitions */
3124 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3125 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3126 amdgpu_ring_write(ring, 0x8000);
3127 amdgpu_ring_write(ring, 0x8000);
3129 amdgpu_ring_unlock_commit(ring);
3134 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3136 struct amdgpu_ring *ring;
3139 u64 rb_addr, rptr_addr;
3142 /* Set the write pointer delay */
3143 WREG32(mmCP_RB_WPTR_DELAY, 0);
3145 /* set the RB to use vmid 0 */
3146 WREG32(mmCP_RB_VMID, 0);
3148 /* Set ring buffer size */
3149 ring = &adev->gfx.gfx_ring[0];
3150 rb_bufsz = order_base_2(ring->ring_size / 8);
3151 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3152 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3153 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3154 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3156 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3158 WREG32(mmCP_RB0_CNTL, tmp);
3160 /* Initialize the ring buffer's read and write pointers */
3161 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3163 WREG32(mmCP_RB0_WPTR, ring->wptr);
3165 /* set the wb address wether it's enabled or not */
3166 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3167 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3168 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3171 WREG32(mmCP_RB0_CNTL, tmp);
3173 rb_addr = ring->gpu_addr >> 8;
3174 WREG32(mmCP_RB0_BASE, rb_addr);
3175 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3177 /* no gfx doorbells on iceland */
3178 if (adev->asic_type != CHIP_TOPAZ) {
3179 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3180 if (ring->use_doorbell) {
3181 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3182 DOORBELL_OFFSET, ring->doorbell_index);
3183 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3186 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3189 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3191 if (adev->asic_type == CHIP_TONGA) {
3192 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3193 DOORBELL_RANGE_LOWER,
3194 AMDGPU_DOORBELL_GFX_RING0);
3195 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3197 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3198 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3203 /* start the ring */
3204 gfx_v8_0_cp_gfx_start(adev);
3206 r = amdgpu_ring_test_ring(ring);
3208 ring->ready = false;
3215 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3220 WREG32(mmCP_MEC_CNTL, 0);
3222 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3223 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3224 adev->gfx.compute_ring[i].ready = false;
3229 static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3231 gfx_v8_0_cp_compute_enable(adev, true);
3236 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3238 const struct gfx_firmware_header_v1_0 *mec_hdr;
3239 const __le32 *fw_data;
3240 unsigned i, fw_size;
3242 if (!adev->gfx.mec_fw)
3245 gfx_v8_0_cp_compute_enable(adev, false);
3247 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3248 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3250 fw_data = (const __le32 *)
3251 (adev->gfx.mec_fw->data +
3252 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3253 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3256 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3257 for (i = 0; i < fw_size; i++)
3258 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3259 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3261 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3262 if (adev->gfx.mec2_fw) {
3263 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3265 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3266 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3268 fw_data = (const __le32 *)
3269 (adev->gfx.mec2_fw->data +
3270 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3271 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3273 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3274 for (i = 0; i < fw_size; i++)
3275 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3276 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3283 uint32_t header; /* ordinal0 */
3284 uint32_t compute_dispatch_initiator; /* ordinal1 */
3285 uint32_t compute_dim_x; /* ordinal2 */
3286 uint32_t compute_dim_y; /* ordinal3 */
3287 uint32_t compute_dim_z; /* ordinal4 */
3288 uint32_t compute_start_x; /* ordinal5 */
3289 uint32_t compute_start_y; /* ordinal6 */
3290 uint32_t compute_start_z; /* ordinal7 */
3291 uint32_t compute_num_thread_x; /* ordinal8 */
3292 uint32_t compute_num_thread_y; /* ordinal9 */
3293 uint32_t compute_num_thread_z; /* ordinal10 */
3294 uint32_t compute_pipelinestat_enable; /* ordinal11 */
3295 uint32_t compute_perfcount_enable; /* ordinal12 */
3296 uint32_t compute_pgm_lo; /* ordinal13 */
3297 uint32_t compute_pgm_hi; /* ordinal14 */
3298 uint32_t compute_tba_lo; /* ordinal15 */
3299 uint32_t compute_tba_hi; /* ordinal16 */
3300 uint32_t compute_tma_lo; /* ordinal17 */
3301 uint32_t compute_tma_hi; /* ordinal18 */
3302 uint32_t compute_pgm_rsrc1; /* ordinal19 */
3303 uint32_t compute_pgm_rsrc2; /* ordinal20 */
3304 uint32_t compute_vmid; /* ordinal21 */
3305 uint32_t compute_resource_limits; /* ordinal22 */
3306 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
3307 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
3308 uint32_t compute_tmpring_size; /* ordinal25 */
3309 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
3310 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
3311 uint32_t compute_restart_x; /* ordinal28 */
3312 uint32_t compute_restart_y; /* ordinal29 */
3313 uint32_t compute_restart_z; /* ordinal30 */
3314 uint32_t compute_thread_trace_enable; /* ordinal31 */
3315 uint32_t compute_misc_reserved; /* ordinal32 */
3316 uint32_t compute_dispatch_id; /* ordinal33 */
3317 uint32_t compute_threadgroup_id; /* ordinal34 */
3318 uint32_t compute_relaunch; /* ordinal35 */
3319 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
3320 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
3321 uint32_t compute_wave_restore_control; /* ordinal38 */
3322 uint32_t reserved9; /* ordinal39 */
3323 uint32_t reserved10; /* ordinal40 */
3324 uint32_t reserved11; /* ordinal41 */
3325 uint32_t reserved12; /* ordinal42 */
3326 uint32_t reserved13; /* ordinal43 */
3327 uint32_t reserved14; /* ordinal44 */
3328 uint32_t reserved15; /* ordinal45 */
3329 uint32_t reserved16; /* ordinal46 */
3330 uint32_t reserved17; /* ordinal47 */
3331 uint32_t reserved18; /* ordinal48 */
3332 uint32_t reserved19; /* ordinal49 */
3333 uint32_t reserved20; /* ordinal50 */
3334 uint32_t reserved21; /* ordinal51 */
3335 uint32_t reserved22; /* ordinal52 */
3336 uint32_t reserved23; /* ordinal53 */
3337 uint32_t reserved24; /* ordinal54 */
3338 uint32_t reserved25; /* ordinal55 */
3339 uint32_t reserved26; /* ordinal56 */
3340 uint32_t reserved27; /* ordinal57 */
3341 uint32_t reserved28; /* ordinal58 */
3342 uint32_t reserved29; /* ordinal59 */
3343 uint32_t reserved30; /* ordinal60 */
3344 uint32_t reserved31; /* ordinal61 */
3345 uint32_t reserved32; /* ordinal62 */
3346 uint32_t reserved33; /* ordinal63 */
3347 uint32_t reserved34; /* ordinal64 */
3348 uint32_t compute_user_data_0; /* ordinal65 */
3349 uint32_t compute_user_data_1; /* ordinal66 */
3350 uint32_t compute_user_data_2; /* ordinal67 */
3351 uint32_t compute_user_data_3; /* ordinal68 */
3352 uint32_t compute_user_data_4; /* ordinal69 */
3353 uint32_t compute_user_data_5; /* ordinal70 */
3354 uint32_t compute_user_data_6; /* ordinal71 */
3355 uint32_t compute_user_data_7; /* ordinal72 */
3356 uint32_t compute_user_data_8; /* ordinal73 */
3357 uint32_t compute_user_data_9; /* ordinal74 */
3358 uint32_t compute_user_data_10; /* ordinal75 */
3359 uint32_t compute_user_data_11; /* ordinal76 */
3360 uint32_t compute_user_data_12; /* ordinal77 */
3361 uint32_t compute_user_data_13; /* ordinal78 */
3362 uint32_t compute_user_data_14; /* ordinal79 */
3363 uint32_t compute_user_data_15; /* ordinal80 */
3364 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
3365 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
3366 uint32_t reserved35; /* ordinal83 */
3367 uint32_t reserved36; /* ordinal84 */
3368 uint32_t reserved37; /* ordinal85 */
3369 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
3370 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
3371 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
3372 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
3373 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
3374 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
3375 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
3376 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
3377 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
3378 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
3379 uint32_t reserved38; /* ordinal96 */
3380 uint32_t reserved39; /* ordinal97 */
3381 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
3382 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
3383 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
3384 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
3385 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
3386 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
3387 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
3388 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
3389 uint32_t reserved40; /* ordinal106 */
3390 uint32_t reserved41; /* ordinal107 */
3391 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
3392 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
3393 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
3394 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
3395 uint32_t reserved42; /* ordinal112 */
3396 uint32_t reserved43; /* ordinal113 */
3397 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
3398 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
3399 uint32_t cp_packet_id_lo; /* ordinal116 */
3400 uint32_t cp_packet_id_hi; /* ordinal117 */
3401 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
3402 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
3403 uint32_t gds_save_base_addr_lo; /* ordinal120 */
3404 uint32_t gds_save_base_addr_hi; /* ordinal121 */
3405 uint32_t gds_save_mask_lo; /* ordinal122 */
3406 uint32_t gds_save_mask_hi; /* ordinal123 */
3407 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
3408 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
3409 uint32_t reserved44; /* ordinal126 */
3410 uint32_t reserved45; /* ordinal127 */
3411 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
3412 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
3413 uint32_t cp_hqd_active; /* ordinal130 */
3414 uint32_t cp_hqd_vmid; /* ordinal131 */
3415 uint32_t cp_hqd_persistent_state; /* ordinal132 */
3416 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
3417 uint32_t cp_hqd_queue_priority; /* ordinal134 */
3418 uint32_t cp_hqd_quantum; /* ordinal135 */
3419 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
3420 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
3421 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
3422 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
3423 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
3424 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
3425 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
3426 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
3427 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
3428 uint32_t cp_hqd_pq_control; /* ordinal145 */
3429 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
3430 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
3431 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
3432 uint32_t cp_hqd_ib_control; /* ordinal149 */
3433 uint32_t cp_hqd_iq_timer; /* ordinal150 */
3434 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
3435 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
3436 uint32_t cp_hqd_dma_offload; /* ordinal153 */
3437 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
3438 uint32_t cp_hqd_msg_type; /* ordinal155 */
3439 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
3440 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
3441 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
3442 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
3443 uint32_t cp_hqd_hq_status0; /* ordinal160 */
3444 uint32_t cp_hqd_hq_control0; /* ordinal161 */
3445 uint32_t cp_mqd_control; /* ordinal162 */
3446 uint32_t cp_hqd_hq_status1; /* ordinal163 */
3447 uint32_t cp_hqd_hq_control1; /* ordinal164 */
3448 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
3449 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
3450 uint32_t cp_hqd_eop_control; /* ordinal167 */
3451 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
3452 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
3453 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
3454 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
3455 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
3456 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
3457 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
3458 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
3459 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
3460 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
3461 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
3462 uint32_t cp_hqd_error; /* ordinal179 */
3463 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
3464 uint32_t cp_hqd_eop_dones; /* ordinal181 */
3465 uint32_t reserved46; /* ordinal182 */
3466 uint32_t reserved47; /* ordinal183 */
3467 uint32_t reserved48; /* ordinal184 */
3468 uint32_t reserved49; /* ordinal185 */
3469 uint32_t reserved50; /* ordinal186 */
3470 uint32_t reserved51; /* ordinal187 */
3471 uint32_t reserved52; /* ordinal188 */
3472 uint32_t reserved53; /* ordinal189 */
3473 uint32_t reserved54; /* ordinal190 */
3474 uint32_t reserved55; /* ordinal191 */
3475 uint32_t iqtimer_pkt_header; /* ordinal192 */
3476 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
3477 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
3478 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
3479 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
3480 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
3481 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
3482 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
3483 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
3484 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
3485 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
3486 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
3487 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
3488 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
3489 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
3490 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
3491 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
3492 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
3493 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
3494 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
3495 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
3496 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
3497 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
3498 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
3499 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
3500 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
3501 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
3502 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
3503 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
3504 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
3505 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
3506 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
3507 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
3508 uint32_t reserved56; /* ordinal225 */
3509 uint32_t reserved57; /* ordinal226 */
3510 uint32_t reserved58; /* ordinal227 */
3511 uint32_t set_resources_header; /* ordinal228 */
3512 uint32_t set_resources_dw1; /* ordinal229 */
3513 uint32_t set_resources_dw2; /* ordinal230 */
3514 uint32_t set_resources_dw3; /* ordinal231 */
3515 uint32_t set_resources_dw4; /* ordinal232 */
3516 uint32_t set_resources_dw5; /* ordinal233 */
3517 uint32_t set_resources_dw6; /* ordinal234 */
3518 uint32_t set_resources_dw7; /* ordinal235 */
3519 uint32_t reserved59; /* ordinal236 */
3520 uint32_t reserved60; /* ordinal237 */
3521 uint32_t reserved61; /* ordinal238 */
3522 uint32_t reserved62; /* ordinal239 */
3523 uint32_t reserved63; /* ordinal240 */
3524 uint32_t reserved64; /* ordinal241 */
3525 uint32_t reserved65; /* ordinal242 */
3526 uint32_t reserved66; /* ordinal243 */
3527 uint32_t reserved67; /* ordinal244 */
3528 uint32_t reserved68; /* ordinal245 */
3529 uint32_t reserved69; /* ordinal246 */
3530 uint32_t reserved70; /* ordinal247 */
3531 uint32_t reserved71; /* ordinal248 */
3532 uint32_t reserved72; /* ordinal249 */
3533 uint32_t reserved73; /* ordinal250 */
3534 uint32_t reserved74; /* ordinal251 */
3535 uint32_t reserved75; /* ordinal252 */
3536 uint32_t reserved76; /* ordinal253 */
3537 uint32_t reserved77; /* ordinal254 */
3538 uint32_t reserved78; /* ordinal255 */
3540 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3543 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3547 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3548 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3550 if (ring->mqd_obj) {
3551 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3552 if (unlikely(r != 0))
3553 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3555 amdgpu_bo_unpin(ring->mqd_obj);
3556 amdgpu_bo_unreserve(ring->mqd_obj);
3558 amdgpu_bo_unref(&ring->mqd_obj);
3559 ring->mqd_obj = NULL;
3564 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3568 bool use_doorbell = true;
3576 /* init the pipes */
3577 mutex_lock(&adev->srbm_mutex);
3578 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3579 int me = (i < 4) ? 1 : 2;
3580 int pipe = (i < 4) ? i : (i - 4);
3582 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3585 vi_srbm_select(adev, me, pipe, 0, 0);
3587 /* write the EOP addr */
3588 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3589 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3591 /* set the VMID assigned */
3592 WREG32(mmCP_HQD_VMID, 0);
3594 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3595 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3596 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3597 (order_base_2(MEC_HPD_SIZE / 4) - 1));
3598 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3600 vi_srbm_select(adev, 0, 0, 0, 0);
3601 mutex_unlock(&adev->srbm_mutex);
3603 /* init the queues. Just two for now. */
3604 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3605 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3607 if (ring->mqd_obj == NULL) {
3608 r = amdgpu_bo_create(adev,
3609 sizeof(struct vi_mqd),
3611 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3612 NULL, &ring->mqd_obj);
3614 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3619 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3620 if (unlikely(r != 0)) {
3621 gfx_v8_0_cp_compute_fini(adev);
3624 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3627 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3628 gfx_v8_0_cp_compute_fini(adev);
3631 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3633 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3634 gfx_v8_0_cp_compute_fini(adev);
3638 /* init the mqd struct */
3639 memset(buf, 0, sizeof(struct vi_mqd));
3641 mqd = (struct vi_mqd *)buf;
3642 mqd->header = 0xC0310800;
3643 mqd->compute_pipelinestat_enable = 0x00000001;
3644 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3645 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3646 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3647 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3648 mqd->compute_misc_reserved = 0x00000003;
3650 mutex_lock(&adev->srbm_mutex);
3651 vi_srbm_select(adev, ring->me,
3655 /* disable wptr polling */
3656 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3657 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3658 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3660 mqd->cp_hqd_eop_base_addr_lo =
3661 RREG32(mmCP_HQD_EOP_BASE_ADDR);
3662 mqd->cp_hqd_eop_base_addr_hi =
3663 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3665 /* enable doorbell? */
3666 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3668 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3670 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3672 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3673 mqd->cp_hqd_pq_doorbell_control = tmp;
3675 /* disable the queue if it's active */
3676 mqd->cp_hqd_dequeue_request = 0;
3677 mqd->cp_hqd_pq_rptr = 0;
3678 mqd->cp_hqd_pq_wptr= 0;
3679 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3680 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3681 for (j = 0; j < adev->usec_timeout; j++) {
3682 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3686 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3687 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3688 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3691 /* set the pointer to the MQD */
3692 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3693 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3694 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3695 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3697 /* set MQD vmid to 0 */
3698 tmp = RREG32(mmCP_MQD_CONTROL);
3699 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3700 WREG32(mmCP_MQD_CONTROL, tmp);
3701 mqd->cp_mqd_control = tmp;
3703 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3704 hqd_gpu_addr = ring->gpu_addr >> 8;
3705 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3706 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3707 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3708 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3710 /* set up the HQD, this is similar to CP_RB0_CNTL */
3711 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3712 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3713 (order_base_2(ring->ring_size / 4) - 1));
3714 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3715 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3717 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3719 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3720 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3721 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3722 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3723 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3724 mqd->cp_hqd_pq_control = tmp;
3726 /* set the wb address wether it's enabled or not */
3727 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3728 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3729 mqd->cp_hqd_pq_rptr_report_addr_hi =
3730 upper_32_bits(wb_gpu_addr) & 0xffff;
3731 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3732 mqd->cp_hqd_pq_rptr_report_addr_lo);
3733 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3734 mqd->cp_hqd_pq_rptr_report_addr_hi);
3736 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3737 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3738 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3739 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3740 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3741 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3742 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3744 /* enable the doorbell if requested */
3746 if ((adev->asic_type == CHIP_CARRIZO) ||
3747 (adev->asic_type == CHIP_FIJI) ||
3748 (adev->asic_type == CHIP_STONEY)) {
3749 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3750 AMDGPU_DOORBELL_KIQ << 2);
3751 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3752 AMDGPU_DOORBELL_MEC_RING7 << 2);
3754 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3755 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3756 DOORBELL_OFFSET, ring->doorbell_index);
3757 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3758 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3759 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3760 mqd->cp_hqd_pq_doorbell_control = tmp;
3763 mqd->cp_hqd_pq_doorbell_control = 0;
3765 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3766 mqd->cp_hqd_pq_doorbell_control);
3768 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3770 mqd->cp_hqd_pq_wptr = ring->wptr;
3771 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3772 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3774 /* set the vmid for the queue */
3775 mqd->cp_hqd_vmid = 0;
3776 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3778 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3779 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3780 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3781 mqd->cp_hqd_persistent_state = tmp;
3782 if (adev->asic_type == CHIP_STONEY) {
3783 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3784 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3785 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3788 /* activate the queue */
3789 mqd->cp_hqd_active = 1;
3790 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3792 vi_srbm_select(adev, 0, 0, 0, 0);
3793 mutex_unlock(&adev->srbm_mutex);
3795 amdgpu_bo_kunmap(ring->mqd_obj);
3796 amdgpu_bo_unreserve(ring->mqd_obj);
3800 tmp = RREG32(mmCP_PQ_STATUS);
3801 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3802 WREG32(mmCP_PQ_STATUS, tmp);
3805 r = gfx_v8_0_cp_compute_start(adev);
3809 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3810 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3813 r = amdgpu_ring_test_ring(ring);
3815 ring->ready = false;
3821 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3825 if (!(adev->flags & AMD_IS_APU))
3826 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3828 if (!adev->pp_enabled) {
3829 if (!adev->firmware.smu_load) {
3830 /* legacy firmware loading */
3831 r = gfx_v8_0_cp_gfx_load_microcode(adev);
3835 r = gfx_v8_0_cp_compute_load_microcode(adev);
3839 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3840 AMDGPU_UCODE_ID_CP_CE);
3844 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3845 AMDGPU_UCODE_ID_CP_PFP);
3849 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3850 AMDGPU_UCODE_ID_CP_ME);
3854 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3855 AMDGPU_UCODE_ID_CP_MEC1);
3861 r = gfx_v8_0_cp_gfx_resume(adev);
3865 r = gfx_v8_0_cp_compute_resume(adev);
3869 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3874 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3876 gfx_v8_0_cp_gfx_enable(adev, enable);
3877 gfx_v8_0_cp_compute_enable(adev, enable);
3880 static int gfx_v8_0_hw_init(void *handle)
3883 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3885 gfx_v8_0_init_golden_registers(adev);
3887 gfx_v8_0_gpu_init(adev);
3889 r = gfx_v8_0_rlc_resume(adev);
3893 r = gfx_v8_0_cp_resume(adev);
3900 static int gfx_v8_0_hw_fini(void *handle)
3902 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3904 gfx_v8_0_cp_enable(adev, false);
3905 gfx_v8_0_rlc_stop(adev);
3906 gfx_v8_0_cp_compute_fini(adev);
3911 static int gfx_v8_0_suspend(void *handle)
3913 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3915 return gfx_v8_0_hw_fini(adev);
3918 static int gfx_v8_0_resume(void *handle)
3920 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3922 return gfx_v8_0_hw_init(adev);
3925 static bool gfx_v8_0_is_idle(void *handle)
3927 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3929 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3935 static int gfx_v8_0_wait_for_idle(void *handle)
3939 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3941 for (i = 0; i < adev->usec_timeout; i++) {
3942 /* read MC_STATUS */
3943 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3945 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3952 static void gfx_v8_0_print_status(void *handle)
3955 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3957 dev_info(adev->dev, "GFX 8.x registers\n");
3958 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
3959 RREG32(mmGRBM_STATUS));
3960 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
3961 RREG32(mmGRBM_STATUS2));
3962 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3963 RREG32(mmGRBM_STATUS_SE0));
3964 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3965 RREG32(mmGRBM_STATUS_SE1));
3966 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3967 RREG32(mmGRBM_STATUS_SE2));
3968 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3969 RREG32(mmGRBM_STATUS_SE3));
3970 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3971 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3972 RREG32(mmCP_STALLED_STAT1));
3973 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3974 RREG32(mmCP_STALLED_STAT2));
3975 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3976 RREG32(mmCP_STALLED_STAT3));
3977 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3978 RREG32(mmCP_CPF_BUSY_STAT));
3979 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3980 RREG32(mmCP_CPF_STALLED_STAT1));
3981 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3982 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3983 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3984 RREG32(mmCP_CPC_STALLED_STAT1));
3985 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3987 for (i = 0; i < 32; i++) {
3988 dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
3989 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3991 for (i = 0; i < 16; i++) {
3992 dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
3993 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3995 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3996 dev_info(adev->dev, " se: %d\n", i);
3997 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3998 dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
3999 RREG32(mmPA_SC_RASTER_CONFIG));
4000 dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
4001 RREG32(mmPA_SC_RASTER_CONFIG_1));
4003 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4005 dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
4006 RREG32(mmGB_ADDR_CONFIG));
4007 dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n",
4008 RREG32(mmHDP_ADDR_CONFIG));
4009 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
4010 RREG32(mmDMIF_ADDR_CALC));
4011 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
4012 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
4013 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
4014 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
4015 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
4016 RREG32(mmUVD_UDEC_ADDR_CONFIG));
4017 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
4018 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
4019 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
4020 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4022 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
4023 RREG32(mmCP_MEQ_THRESHOLDS));
4024 dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
4025 RREG32(mmSX_DEBUG_1));
4026 dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
4027 RREG32(mmTA_CNTL_AUX));
4028 dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
4029 RREG32(mmSPI_CONFIG_CNTL));
4030 dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
4031 RREG32(mmSQ_CONFIG));
4032 dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
4033 RREG32(mmDB_DEBUG));
4034 dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
4035 RREG32(mmDB_DEBUG2));
4036 dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
4037 RREG32(mmDB_DEBUG3));
4038 dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
4039 RREG32(mmCB_HW_CONTROL));
4040 dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
4041 RREG32(mmSPI_CONFIG_CNTL_1));
4042 dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
4043 RREG32(mmPA_SC_FIFO_SIZE));
4044 dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
4045 RREG32(mmVGT_NUM_INSTANCES));
4046 dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
4047 RREG32(mmCP_PERFMON_CNTL));
4048 dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4049 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4050 dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
4051 RREG32(mmVGT_CACHE_INVALIDATION));
4052 dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
4053 RREG32(mmVGT_GS_VERTEX_REUSE));
4054 dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4055 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4056 dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
4057 RREG32(mmPA_CL_ENHANCE));
4058 dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
4059 RREG32(mmPA_SC_ENHANCE));
4061 dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
4062 RREG32(mmCP_ME_CNTL));
4063 dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
4064 RREG32(mmCP_MAX_CONTEXT));
4065 dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n",
4066 RREG32(mmCP_ENDIAN_SWAP));
4067 dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
4068 RREG32(mmCP_DEVICE_ID));
4070 dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
4071 RREG32(mmCP_SEM_WAIT_TIMER));
4073 dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
4074 RREG32(mmCP_RB_WPTR_DELAY));
4075 dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
4076 RREG32(mmCP_RB_VMID));
4077 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4078 RREG32(mmCP_RB0_CNTL));
4079 dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
4080 RREG32(mmCP_RB0_WPTR));
4081 dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
4082 RREG32(mmCP_RB0_RPTR_ADDR));
4083 dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4084 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4085 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4086 RREG32(mmCP_RB0_CNTL));
4087 dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
4088 RREG32(mmCP_RB0_BASE));
4089 dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
4090 RREG32(mmCP_RB0_BASE_HI));
4091 dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
4092 RREG32(mmCP_MEC_CNTL));
4093 dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n",
4094 RREG32(mmCP_CPF_DEBUG));
4096 dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
4097 RREG32(mmSCRATCH_ADDR));
4098 dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
4099 RREG32(mmSCRATCH_UMSK));
4101 dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
4102 RREG32(mmCP_INT_CNTL_RING0));
4103 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4104 RREG32(mmRLC_LB_CNTL));
4105 dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
4106 RREG32(mmRLC_CNTL));
4107 dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
4108 RREG32(mmRLC_CGCG_CGLS_CTRL));
4109 dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
4110 RREG32(mmRLC_LB_CNTR_INIT));
4111 dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
4112 RREG32(mmRLC_LB_CNTR_MAX));
4113 dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
4114 RREG32(mmRLC_LB_INIT_CU_MASK));
4115 dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
4116 RREG32(mmRLC_LB_PARAMS));
4117 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4118 RREG32(mmRLC_LB_CNTL));
4119 dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n",
4120 RREG32(mmRLC_MC_CNTL));
4121 dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
4122 RREG32(mmRLC_UCODE_CNTL));
4124 mutex_lock(&adev->srbm_mutex);
4125 for (i = 0; i < 16; i++) {
4126 vi_srbm_select(adev, 0, 0, 0, i);
4127 dev_info(adev->dev, " VM %d:\n", i);
4128 dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
4129 RREG32(mmSH_MEM_CONFIG));
4130 dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n",
4131 RREG32(mmSH_MEM_APE1_BASE));
4132 dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n",
4133 RREG32(mmSH_MEM_APE1_LIMIT));
4134 dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
4135 RREG32(mmSH_MEM_BASES));
4137 vi_srbm_select(adev, 0, 0, 0, 0);
4138 mutex_unlock(&adev->srbm_mutex);
4141 static int gfx_v8_0_soft_reset(void *handle)
4143 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4145 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4148 tmp = RREG32(mmGRBM_STATUS);
4149 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4150 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4151 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4152 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4153 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4154 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4155 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4156 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4157 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4158 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4161 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4162 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4163 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4164 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4165 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4169 tmp = RREG32(mmGRBM_STATUS2);
4170 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4171 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4172 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4175 tmp = RREG32(mmSRBM_STATUS);
4176 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4177 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4178 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4180 if (grbm_soft_reset || srbm_soft_reset) {
4181 gfx_v8_0_print_status((void *)adev);
4183 gfx_v8_0_rlc_stop(adev);
4185 /* Disable GFX parsing/prefetching */
4186 gfx_v8_0_cp_gfx_enable(adev, false);
4188 /* Disable MEC parsing/prefetching */
4189 gfx_v8_0_cp_compute_enable(adev, false);
4191 if (grbm_soft_reset || srbm_soft_reset) {
4192 tmp = RREG32(mmGMCON_DEBUG);
4193 tmp = REG_SET_FIELD(tmp,
4194 GMCON_DEBUG, GFX_STALL, 1);
4195 tmp = REG_SET_FIELD(tmp,
4196 GMCON_DEBUG, GFX_CLEAR, 1);
4197 WREG32(mmGMCON_DEBUG, tmp);
4202 if (grbm_soft_reset) {
4203 tmp = RREG32(mmGRBM_SOFT_RESET);
4204 tmp |= grbm_soft_reset;
4205 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4206 WREG32(mmGRBM_SOFT_RESET, tmp);
4207 tmp = RREG32(mmGRBM_SOFT_RESET);
4211 tmp &= ~grbm_soft_reset;
4212 WREG32(mmGRBM_SOFT_RESET, tmp);
4213 tmp = RREG32(mmGRBM_SOFT_RESET);
4216 if (srbm_soft_reset) {
4217 tmp = RREG32(mmSRBM_SOFT_RESET);
4218 tmp |= srbm_soft_reset;
4219 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4220 WREG32(mmSRBM_SOFT_RESET, tmp);
4221 tmp = RREG32(mmSRBM_SOFT_RESET);
4225 tmp &= ~srbm_soft_reset;
4226 WREG32(mmSRBM_SOFT_RESET, tmp);
4227 tmp = RREG32(mmSRBM_SOFT_RESET);
4230 if (grbm_soft_reset || srbm_soft_reset) {
4231 tmp = RREG32(mmGMCON_DEBUG);
4232 tmp = REG_SET_FIELD(tmp,
4233 GMCON_DEBUG, GFX_STALL, 0);
4234 tmp = REG_SET_FIELD(tmp,
4235 GMCON_DEBUG, GFX_CLEAR, 0);
4236 WREG32(mmGMCON_DEBUG, tmp);
4239 /* Wait a little for things to settle down */
4241 gfx_v8_0_print_status((void *)adev);
4247 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4249 * @adev: amdgpu_device pointer
4251 * Fetches a GPU clock counter snapshot.
4252 * Returns the 64 bit clock counter snapshot.
4254 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4258 mutex_lock(&adev->gfx.gpu_clock_mutex);
4259 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4260 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4261 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4262 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4266 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4268 uint32_t gds_base, uint32_t gds_size,
4269 uint32_t gws_base, uint32_t gws_size,
4270 uint32_t oa_base, uint32_t oa_size)
4272 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4273 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4275 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4276 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4278 oa_base = oa_base >> AMDGPU_OA_SHIFT;
4279 oa_size = oa_size >> AMDGPU_OA_SHIFT;
4282 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4283 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4284 WRITE_DATA_DST_SEL(0)));
4285 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4286 amdgpu_ring_write(ring, 0);
4287 amdgpu_ring_write(ring, gds_base);
4290 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4291 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4292 WRITE_DATA_DST_SEL(0)));
4293 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4294 amdgpu_ring_write(ring, 0);
4295 amdgpu_ring_write(ring, gds_size);
4298 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4299 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4300 WRITE_DATA_DST_SEL(0)));
4301 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4302 amdgpu_ring_write(ring, 0);
4303 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4306 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4307 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4308 WRITE_DATA_DST_SEL(0)));
4309 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4310 amdgpu_ring_write(ring, 0);
4311 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4314 static int gfx_v8_0_early_init(void *handle)
4316 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4318 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4319 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4320 gfx_v8_0_set_ring_funcs(adev);
4321 gfx_v8_0_set_irq_funcs(adev);
4322 gfx_v8_0_set_gds_init(adev);
4327 static int gfx_v8_0_late_init(void *handle)
4329 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4332 /* requires IBs so do in late init after IB pool is initialized */
4333 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4340 static int gfx_v8_0_set_powergating_state(void *handle,
4341 enum amd_powergating_state state)
4346 static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4347 uint32_t reg_addr, uint32_t cmd)
4351 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4353 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4354 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4356 data = RREG32(mmRLC_SERDES_WR_CTRL);
4357 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4358 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4359 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4360 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4361 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4362 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4363 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4364 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4365 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4366 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4367 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4368 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4369 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4370 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4371 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4373 WREG32(mmRLC_SERDES_WR_CTRL, data);
4376 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4379 uint32_t temp, data;
4381 /* It is disabled by HW by default */
4383 /* 1 - RLC memory Light sleep */
4384 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4385 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4387 WREG32(mmRLC_MEM_SLP_CNTL, data);
4389 /* 2 - CP memory Light sleep */
4390 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4391 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4393 WREG32(mmCP_MEM_SLP_CNTL, data);
4395 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4396 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4397 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4398 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4399 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4400 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4403 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4405 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4406 gfx_v8_0_wait_for_rlc_serdes(adev);
4408 /* 5 - clear mgcg override */
4409 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4411 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4412 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4413 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4414 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4415 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4416 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4417 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4418 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4419 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4421 WREG32(mmCGTS_SM_CTRL_REG, data);
4424 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4425 gfx_v8_0_wait_for_rlc_serdes(adev);
4427 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4428 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4429 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4430 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4431 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4432 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4434 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4436 /* 2 - disable MGLS in RLC */
4437 data = RREG32(mmRLC_MEM_SLP_CNTL);
4438 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4439 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4440 WREG32(mmRLC_MEM_SLP_CNTL, data);
4443 /* 3 - disable MGLS in CP */
4444 data = RREG32(mmCP_MEM_SLP_CNTL);
4445 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4446 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4447 WREG32(mmCP_MEM_SLP_CNTL, data);
4450 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4451 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4452 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4453 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4455 WREG32(mmCGTS_SM_CTRL_REG, data);
4457 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4458 gfx_v8_0_wait_for_rlc_serdes(adev);
4460 /* 6 - set mgcg override */
4461 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4465 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4466 gfx_v8_0_wait_for_rlc_serdes(adev);
4470 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4473 uint32_t temp, temp1, data, data1;
4475 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4478 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4479 * Cmp_busy/GFX_Idle interrupts
4481 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4483 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4484 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4486 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4488 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4489 gfx_v8_0_wait_for_rlc_serdes(adev);
4491 /* 3 - clear cgcg override */
4492 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4494 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4495 gfx_v8_0_wait_for_rlc_serdes(adev);
4497 /* 4 - write cmd to set CGLS */
4498 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4500 /* 5 - enable cgcg */
4501 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4504 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4506 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4507 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4510 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4513 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4515 /* disable cntx_empty_int_enable & GFX Idle interrupt */
4516 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4519 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4520 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4521 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4523 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4525 /* read gfx register to wake up cgcg */
4526 RREG32(mmCB_CGTT_SCLK_CTRL);
4527 RREG32(mmCB_CGTT_SCLK_CTRL);
4528 RREG32(mmCB_CGTT_SCLK_CTRL);
4529 RREG32(mmCB_CGTT_SCLK_CTRL);
4531 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4532 gfx_v8_0_wait_for_rlc_serdes(adev);
4534 /* write cmd to Set CGCG Overrride */
4535 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4537 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4538 gfx_v8_0_wait_for_rlc_serdes(adev);
4540 /* write cmd to Clear CGLS */
4541 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4543 /* disable cgcg, cgls should be disabled too. */
4544 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4545 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4547 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4550 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4554 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4555 * === MGCG + MGLS + TS(CG/LS) ===
4557 fiji_update_medium_grain_clock_gating(adev, enable);
4558 fiji_update_coarse_grain_clock_gating(adev, enable);
4560 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4561 * === CGCG + CGLS ===
4563 fiji_update_coarse_grain_clock_gating(adev, enable);
4564 fiji_update_medium_grain_clock_gating(adev, enable);
4569 static int gfx_v8_0_set_clockgating_state(void *handle,
4570 enum amd_clockgating_state state)
4572 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4574 switch (adev->asic_type) {
4576 fiji_update_gfx_clock_gating(adev,
4577 state == AMD_CG_STATE_GATE ? true : false);
4585 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4589 rptr = ring->adev->wb.wb[ring->rptr_offs];
4594 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4596 struct amdgpu_device *adev = ring->adev;
4599 if (ring->use_doorbell)
4600 /* XXX check if swapping is necessary on BE */
4601 wptr = ring->adev->wb.wb[ring->wptr_offs];
4603 wptr = RREG32(mmCP_RB0_WPTR);
4608 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4610 struct amdgpu_device *adev = ring->adev;
4612 if (ring->use_doorbell) {
4613 /* XXX check if swapping is necessary on BE */
4614 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4615 WDOORBELL32(ring->doorbell_index, ring->wptr);
4617 WREG32(mmCP_RB0_WPTR, ring->wptr);
4618 (void)RREG32(mmCP_RB0_WPTR);
4622 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4624 u32 ref_and_mask, reg_mem_engine;
4626 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4629 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4632 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4639 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4640 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4643 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4644 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4645 WAIT_REG_MEM_FUNCTION(3) | /* == */
4647 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4648 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4649 amdgpu_ring_write(ring, ref_and_mask);
4650 amdgpu_ring_write(ring, ref_and_mask);
4651 amdgpu_ring_write(ring, 0x20); /* poll interval */
4654 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4655 struct amdgpu_ib *ib)
4657 bool need_ctx_switch = ring->current_ctx != ib->ctx;
4658 u32 header, control = 0;
4659 u32 next_rptr = ring->wptr + 5;
4661 /* drop the CE preamble IB for the same context */
4662 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4665 if (need_ctx_switch)
4669 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4670 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4671 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4672 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4673 amdgpu_ring_write(ring, next_rptr);
4675 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4676 if (need_ctx_switch) {
4677 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4678 amdgpu_ring_write(ring, 0);
4681 if (ib->flags & AMDGPU_IB_FLAG_CE)
4682 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4684 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4686 control |= ib->length_dw |
4687 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4689 amdgpu_ring_write(ring, header);
4690 amdgpu_ring_write(ring,
4694 (ib->gpu_addr & 0xFFFFFFFC));
4695 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4696 amdgpu_ring_write(ring, control);
4699 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4700 struct amdgpu_ib *ib)
4702 u32 header, control = 0;
4703 u32 next_rptr = ring->wptr + 5;
4705 control |= INDIRECT_BUFFER_VALID;
4708 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4709 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4710 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4711 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4712 amdgpu_ring_write(ring, next_rptr);
4714 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4716 control |= ib->length_dw |
4717 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4719 amdgpu_ring_write(ring, header);
4720 amdgpu_ring_write(ring,
4724 (ib->gpu_addr & 0xFFFFFFFC));
4725 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4726 amdgpu_ring_write(ring, control);
4729 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4730 u64 seq, unsigned flags)
4732 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4733 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4735 /* EVENT_WRITE_EOP - flush caches, send int */
4736 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4737 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4739 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4741 amdgpu_ring_write(ring, addr & 0xfffffffc);
4742 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4743 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4744 amdgpu_ring_write(ring, lower_32_bits(seq));
4745 amdgpu_ring_write(ring, upper_32_bits(seq));
4750 * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4752 * @ring: amdgpu ring buffer object
4753 * @semaphore: amdgpu semaphore object
4754 * @emit_wait: Is this a sempahore wait?
4756 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4757 * from running ahead of semaphore waits.
4759 static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4760 struct amdgpu_semaphore *semaphore,
4763 uint64_t addr = semaphore->gpu_addr;
4764 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4766 if (ring->adev->asic_type == CHIP_TOPAZ ||
4767 ring->adev->asic_type == CHIP_TONGA ||
4768 ring->adev->asic_type == CHIP_FIJI)
4769 /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4772 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4773 amdgpu_ring_write(ring, lower_32_bits(addr));
4774 amdgpu_ring_write(ring, upper_32_bits(addr));
4775 amdgpu_ring_write(ring, sel);
4778 if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4779 /* Prevent the PFP from running ahead of the semaphore wait */
4780 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4781 amdgpu_ring_write(ring, 0x0);
4787 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4788 unsigned vm_id, uint64_t pd_addr)
4790 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4791 uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
4792 uint64_t addr = ring->fence_drv.gpu_addr;
4794 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4795 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4796 WAIT_REG_MEM_FUNCTION(3))); /* equal */
4797 amdgpu_ring_write(ring, addr & 0xfffffffc);
4798 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4799 amdgpu_ring_write(ring, seq);
4800 amdgpu_ring_write(ring, 0xffffffff);
4801 amdgpu_ring_write(ring, 4); /* poll interval */
4804 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4805 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4806 amdgpu_ring_write(ring, 0);
4807 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4808 amdgpu_ring_write(ring, 0);
4811 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4812 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4813 WRITE_DATA_DST_SEL(0)) |
4816 amdgpu_ring_write(ring,
4817 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4819 amdgpu_ring_write(ring,
4820 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4822 amdgpu_ring_write(ring, 0);
4823 amdgpu_ring_write(ring, pd_addr >> 12);
4825 /* bits 0-15 are the VM contexts0-15 */
4826 /* invalidate the cache */
4827 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4828 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4829 WRITE_DATA_DST_SEL(0)));
4830 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4831 amdgpu_ring_write(ring, 0);
4832 amdgpu_ring_write(ring, 1 << vm_id);
4834 /* wait for the invalidate to complete */
4835 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4836 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4837 WAIT_REG_MEM_FUNCTION(0) | /* always */
4838 WAIT_REG_MEM_ENGINE(0))); /* me */
4839 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4840 amdgpu_ring_write(ring, 0);
4841 amdgpu_ring_write(ring, 0); /* ref */
4842 amdgpu_ring_write(ring, 0); /* mask */
4843 amdgpu_ring_write(ring, 0x20); /* poll interval */
4845 /* compute doesn't have PFP */
4847 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4848 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4849 amdgpu_ring_write(ring, 0x0);
4850 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4851 amdgpu_ring_write(ring, 0);
4852 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4853 amdgpu_ring_write(ring, 0);
4857 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4859 return ring->adev->wb.wb[ring->rptr_offs];
4862 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4864 return ring->adev->wb.wb[ring->wptr_offs];
4867 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4869 struct amdgpu_device *adev = ring->adev;
4871 /* XXX check if swapping is necessary on BE */
4872 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4873 WDOORBELL32(ring->doorbell_index, ring->wptr);
4876 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4880 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4881 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4883 /* RELEASE_MEM - flush caches, send int */
4884 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4885 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4887 EOP_TC_WB_ACTION_EN |
4888 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4890 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4891 amdgpu_ring_write(ring, addr & 0xfffffffc);
4892 amdgpu_ring_write(ring, upper_32_bits(addr));
4893 amdgpu_ring_write(ring, lower_32_bits(seq));
4894 amdgpu_ring_write(ring, upper_32_bits(seq));
4897 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4898 enum amdgpu_interrupt_state state)
4903 case AMDGPU_IRQ_STATE_DISABLE:
4904 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4905 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4906 TIME_STAMP_INT_ENABLE, 0);
4907 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4909 case AMDGPU_IRQ_STATE_ENABLE:
4910 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4912 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4913 TIME_STAMP_INT_ENABLE, 1);
4914 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4921 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4923 enum amdgpu_interrupt_state state)
4925 u32 mec_int_cntl, mec_int_cntl_reg;
4928 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4929 * handles the setting of interrupts for this specific pipe. All other
4930 * pipes' interrupts are set by amdkfd.
4936 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4939 DRM_DEBUG("invalid pipe %d\n", pipe);
4943 DRM_DEBUG("invalid me %d\n", me);
4948 case AMDGPU_IRQ_STATE_DISABLE:
4949 mec_int_cntl = RREG32(mec_int_cntl_reg);
4950 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4951 TIME_STAMP_INT_ENABLE, 0);
4952 WREG32(mec_int_cntl_reg, mec_int_cntl);
4954 case AMDGPU_IRQ_STATE_ENABLE:
4955 mec_int_cntl = RREG32(mec_int_cntl_reg);
4956 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4957 TIME_STAMP_INT_ENABLE, 1);
4958 WREG32(mec_int_cntl_reg, mec_int_cntl);
4965 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4966 struct amdgpu_irq_src *source,
4968 enum amdgpu_interrupt_state state)
4973 case AMDGPU_IRQ_STATE_DISABLE:
4974 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4975 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4976 PRIV_REG_INT_ENABLE, 0);
4977 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4979 case AMDGPU_IRQ_STATE_ENABLE:
4980 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4981 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4982 PRIV_REG_INT_ENABLE, 0);
4983 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4992 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4993 struct amdgpu_irq_src *source,
4995 enum amdgpu_interrupt_state state)
5000 case AMDGPU_IRQ_STATE_DISABLE:
5001 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5002 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5003 PRIV_INSTR_INT_ENABLE, 0);
5004 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5006 case AMDGPU_IRQ_STATE_ENABLE:
5007 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5008 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5009 PRIV_INSTR_INT_ENABLE, 1);
5010 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5019 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5020 struct amdgpu_irq_src *src,
5022 enum amdgpu_interrupt_state state)
5025 case AMDGPU_CP_IRQ_GFX_EOP:
5026 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5028 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5029 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5031 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5032 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5034 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5035 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5037 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5038 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5040 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5041 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5043 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5044 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5046 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5047 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5049 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5050 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5058 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5059 struct amdgpu_irq_src *source,
5060 struct amdgpu_iv_entry *entry)
5063 u8 me_id, pipe_id, queue_id;
5064 struct amdgpu_ring *ring;
5066 DRM_DEBUG("IH: CP EOP\n");
5067 me_id = (entry->ring_id & 0x0c) >> 2;
5068 pipe_id = (entry->ring_id & 0x03) >> 0;
5069 queue_id = (entry->ring_id & 0x70) >> 4;
5073 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5077 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5078 ring = &adev->gfx.compute_ring[i];
5079 /* Per-queue interrupt is supported for MEC starting from VI.
5080 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5082 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5083 amdgpu_fence_process(ring);
5090 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5091 struct amdgpu_irq_src *source,
5092 struct amdgpu_iv_entry *entry)
5094 DRM_ERROR("Illegal register access in command stream\n");
5095 schedule_work(&adev->reset_work);
5099 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5100 struct amdgpu_irq_src *source,
5101 struct amdgpu_iv_entry *entry)
5103 DRM_ERROR("Illegal instruction in command stream\n");
5104 schedule_work(&adev->reset_work);
5108 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5109 .early_init = gfx_v8_0_early_init,
5110 .late_init = gfx_v8_0_late_init,
5111 .sw_init = gfx_v8_0_sw_init,
5112 .sw_fini = gfx_v8_0_sw_fini,
5113 .hw_init = gfx_v8_0_hw_init,
5114 .hw_fini = gfx_v8_0_hw_fini,
5115 .suspend = gfx_v8_0_suspend,
5116 .resume = gfx_v8_0_resume,
5117 .is_idle = gfx_v8_0_is_idle,
5118 .wait_for_idle = gfx_v8_0_wait_for_idle,
5119 .soft_reset = gfx_v8_0_soft_reset,
5120 .print_status = gfx_v8_0_print_status,
5121 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5122 .set_powergating_state = gfx_v8_0_set_powergating_state,
5125 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5126 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5127 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5128 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5130 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5131 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5132 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5133 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5134 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5135 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5136 .test_ring = gfx_v8_0_ring_test_ring,
5137 .test_ib = gfx_v8_0_ring_test_ib,
5138 .insert_nop = amdgpu_ring_insert_nop,
5141 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5142 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5143 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5144 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5146 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5147 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5148 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5149 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5150 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5151 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5152 .test_ring = gfx_v8_0_ring_test_ring,
5153 .test_ib = gfx_v8_0_ring_test_ib,
5154 .insert_nop = amdgpu_ring_insert_nop,
5157 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5161 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5162 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5164 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5165 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5168 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5169 .set = gfx_v8_0_set_eop_interrupt_state,
5170 .process = gfx_v8_0_eop_irq,
5173 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5174 .set = gfx_v8_0_set_priv_reg_fault_state,
5175 .process = gfx_v8_0_priv_reg_irq,
5178 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5179 .set = gfx_v8_0_set_priv_inst_fault_state,
5180 .process = gfx_v8_0_priv_inst_irq,
5183 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5185 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5186 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5188 adev->gfx.priv_reg_irq.num_types = 1;
5189 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5191 adev->gfx.priv_inst_irq.num_types = 1;
5192 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5195 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5197 /* init asci gds info */
5198 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5199 adev->gds.gws.total_size = 64;
5200 adev->gds.oa.total_size = 16;
5202 if (adev->gds.mem.total_size == 64 * 1024) {
5203 adev->gds.mem.gfx_partition_size = 4096;
5204 adev->gds.mem.cs_partition_size = 4096;
5206 adev->gds.gws.gfx_partition_size = 4;
5207 adev->gds.gws.cs_partition_size = 4;
5209 adev->gds.oa.gfx_partition_size = 4;
5210 adev->gds.oa.cs_partition_size = 1;
5212 adev->gds.mem.gfx_partition_size = 1024;
5213 adev->gds.mem.cs_partition_size = 1024;
5215 adev->gds.gws.gfx_partition_size = 16;
5216 adev->gds.gws.cs_partition_size = 16;
5218 adev->gds.oa.gfx_partition_size = 4;
5219 adev->gds.oa.cs_partition_size = 4;
5223 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
5226 u32 mask = 0, tmp, tmp1;
5229 gfx_v8_0_select_se_sh(adev, se, sh);
5230 tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5231 tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5232 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5239 for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
5244 return (~tmp) & mask;
5247 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5248 struct amdgpu_cu_info *cu_info)
5250 int i, j, k, counter, active_cu_number = 0;
5251 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5253 if (!adev || !cu_info)
5256 mutex_lock(&adev->grbm_idx_mutex);
5257 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5258 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5262 bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
5263 cu_info->bitmap[i][j] = bitmap;
5265 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5266 if (bitmap & mask) {
5273 active_cu_number += counter;
5274 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5278 cu_info->number = active_cu_number;
5279 cu_info->ao_cu_mask = ao_cu_mask;
5280 mutex_unlock(&adev->grbm_idx_mutex);