2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
29 #include "vi_structs.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_MEC_HPD_SIZE 2048
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
83 /* BPM Register Address*/
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
93 #define RLC_FormatDirectRegListLength 14
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
173 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191 static const u32 golden_settings_tonga_a11[] =
193 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196 mmGB_GPU_ID, 0x0000000f, 0x00000000,
197 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
200 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
201 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
202 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
204 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
205 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
208 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
211 static const u32 tonga_golden_common_all[] =
213 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
219 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
223 static const u32 tonga_mgcg_cgcg_init[] =
225 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302 static const u32 golden_settings_vegam_a11[] =
304 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x01180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 static const u32 vegam_golden_common_all[] =
325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333 static const u32 golden_settings_polaris11_a11[] =
335 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
337 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
341 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
343 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
345 mmSQ_CONFIG, 0x07f80000, 0x01180000,
346 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
351 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 static const u32 polaris11_golden_common_all[] =
356 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
360 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
364 static const u32 golden_settings_polaris10_a11[] =
366 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
367 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
369 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377 mmSQ_CONFIG, 0x07f80000, 0x07180000,
378 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
382 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 static const u32 polaris10_golden_common_all[] =
387 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
393 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
397 static const u32 fiji_golden_common_all[] =
399 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
402 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
403 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
405 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
407 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
411 static const u32 golden_settings_fiji_a10[] =
413 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
416 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
417 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
419 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
420 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
422 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
423 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
426 static const u32 fiji_mgcg_cgcg_init[] =
428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465 static const u32 golden_settings_iceland_a11[] =
467 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470 mmGB_GPU_ID, 0x0000000f, 0x00000000,
471 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
475 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
476 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
477 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
479 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
480 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485 static const u32 iceland_golden_common_all[] =
487 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
493 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
497 static const u32 iceland_mgcg_cgcg_init[] =
499 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565 static const u32 cz_golden_settings_a11[] =
567 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569 mmGB_GPU_ID, 0x0000000f, 0x00000000,
570 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
573 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
574 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
575 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
576 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
577 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581 static const u32 cz_golden_common_all[] =
583 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
589 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
593 static const u32 cz_mgcg_cgcg_init[] =
595 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672 static const u32 stoney_golden_settings_a11[] =
674 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675 mmGB_GPU_ID, 0x0000000f, 0x00000000,
676 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
680 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
681 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686 static const u32 stoney_golden_common_all[] =
688 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
694 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
698 static const u32 stoney_mgcg_cgcg_init[] =
700 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
708 static const char * const sq_edc_source_names[] = {
709 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
710 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
711 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
712 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
713 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
714 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
715 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
718 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
719 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
720 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
722 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
723 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
724 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
725 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
729 switch (adev->asic_type) {
731 amdgpu_device_program_register_sequence(adev,
732 iceland_mgcg_cgcg_init,
733 ARRAY_SIZE(iceland_mgcg_cgcg_init));
734 amdgpu_device_program_register_sequence(adev,
735 golden_settings_iceland_a11,
736 ARRAY_SIZE(golden_settings_iceland_a11));
737 amdgpu_device_program_register_sequence(adev,
738 iceland_golden_common_all,
739 ARRAY_SIZE(iceland_golden_common_all));
742 amdgpu_device_program_register_sequence(adev,
744 ARRAY_SIZE(fiji_mgcg_cgcg_init));
745 amdgpu_device_program_register_sequence(adev,
746 golden_settings_fiji_a10,
747 ARRAY_SIZE(golden_settings_fiji_a10));
748 amdgpu_device_program_register_sequence(adev,
749 fiji_golden_common_all,
750 ARRAY_SIZE(fiji_golden_common_all));
754 amdgpu_device_program_register_sequence(adev,
755 tonga_mgcg_cgcg_init,
756 ARRAY_SIZE(tonga_mgcg_cgcg_init));
757 amdgpu_device_program_register_sequence(adev,
758 golden_settings_tonga_a11,
759 ARRAY_SIZE(golden_settings_tonga_a11));
760 amdgpu_device_program_register_sequence(adev,
761 tonga_golden_common_all,
762 ARRAY_SIZE(tonga_golden_common_all));
765 amdgpu_device_program_register_sequence(adev,
766 golden_settings_vegam_a11,
767 ARRAY_SIZE(golden_settings_vegam_a11));
768 amdgpu_device_program_register_sequence(adev,
769 vegam_golden_common_all,
770 ARRAY_SIZE(vegam_golden_common_all));
774 amdgpu_device_program_register_sequence(adev,
775 golden_settings_polaris11_a11,
776 ARRAY_SIZE(golden_settings_polaris11_a11));
777 amdgpu_device_program_register_sequence(adev,
778 polaris11_golden_common_all,
779 ARRAY_SIZE(polaris11_golden_common_all));
782 amdgpu_device_program_register_sequence(adev,
783 golden_settings_polaris10_a11,
784 ARRAY_SIZE(golden_settings_polaris10_a11));
785 amdgpu_device_program_register_sequence(adev,
786 polaris10_golden_common_all,
787 ARRAY_SIZE(polaris10_golden_common_all));
788 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
789 if (adev->pdev->revision == 0xc7 &&
790 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
791 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
792 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
793 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
794 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
798 amdgpu_device_program_register_sequence(adev,
800 ARRAY_SIZE(cz_mgcg_cgcg_init));
801 amdgpu_device_program_register_sequence(adev,
802 cz_golden_settings_a11,
803 ARRAY_SIZE(cz_golden_settings_a11));
804 amdgpu_device_program_register_sequence(adev,
805 cz_golden_common_all,
806 ARRAY_SIZE(cz_golden_common_all));
809 amdgpu_device_program_register_sequence(adev,
810 stoney_mgcg_cgcg_init,
811 ARRAY_SIZE(stoney_mgcg_cgcg_init));
812 amdgpu_device_program_register_sequence(adev,
813 stoney_golden_settings_a11,
814 ARRAY_SIZE(stoney_golden_settings_a11));
815 amdgpu_device_program_register_sequence(adev,
816 stoney_golden_common_all,
817 ARRAY_SIZE(stoney_golden_common_all));
824 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
826 adev->gfx.scratch.num_reg = 8;
827 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
828 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
831 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
833 struct amdgpu_device *adev = ring->adev;
839 r = amdgpu_gfx_scratch_get(adev, &scratch);
841 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
844 WREG32(scratch, 0xCAFEDEAD);
845 r = amdgpu_ring_alloc(ring, 3);
847 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
849 amdgpu_gfx_scratch_free(adev, scratch);
852 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
853 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
854 amdgpu_ring_write(ring, 0xDEADBEEF);
855 amdgpu_ring_commit(ring);
857 for (i = 0; i < adev->usec_timeout; i++) {
858 tmp = RREG32(scratch);
859 if (tmp == 0xDEADBEEF)
863 if (i < adev->usec_timeout) {
864 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
867 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
868 ring->idx, scratch, tmp);
871 amdgpu_gfx_scratch_free(adev, scratch);
875 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
877 struct amdgpu_device *adev = ring->adev;
879 struct dma_fence *f = NULL;
886 r = amdgpu_device_wb_get(adev, &index);
888 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
892 gpu_addr = adev->wb.gpu_addr + (index * 4);
893 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
894 memset(&ib, 0, sizeof(ib));
895 r = amdgpu_ib_get(adev, NULL, 16, &ib);
897 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
900 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
901 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
902 ib.ptr[2] = lower_32_bits(gpu_addr);
903 ib.ptr[3] = upper_32_bits(gpu_addr);
904 ib.ptr[4] = 0xDEADBEEF;
907 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
911 r = dma_fence_wait_timeout(f, false, timeout);
913 DRM_ERROR("amdgpu: IB test timed out.\n");
917 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
921 tmp = adev->wb.wb[index];
922 if (tmp == 0xDEADBEEF) {
923 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
926 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
931 amdgpu_ib_free(adev, &ib, NULL);
934 amdgpu_device_wb_free(adev, index);
939 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
941 release_firmware(adev->gfx.pfp_fw);
942 adev->gfx.pfp_fw = NULL;
943 release_firmware(adev->gfx.me_fw);
944 adev->gfx.me_fw = NULL;
945 release_firmware(adev->gfx.ce_fw);
946 adev->gfx.ce_fw = NULL;
947 release_firmware(adev->gfx.rlc_fw);
948 adev->gfx.rlc_fw = NULL;
949 release_firmware(adev->gfx.mec_fw);
950 adev->gfx.mec_fw = NULL;
951 if ((adev->asic_type != CHIP_STONEY) &&
952 (adev->asic_type != CHIP_TOPAZ))
953 release_firmware(adev->gfx.mec2_fw);
954 adev->gfx.mec2_fw = NULL;
956 kfree(adev->gfx.rlc.register_list_format);
959 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
961 const char *chip_name;
964 struct amdgpu_firmware_info *info = NULL;
965 const struct common_firmware_header *header = NULL;
966 const struct gfx_firmware_header_v1_0 *cp_hdr;
967 const struct rlc_firmware_header_v2_0 *rlc_hdr;
968 unsigned int *tmp = NULL, i;
972 switch (adev->asic_type) {
980 chip_name = "carrizo";
986 chip_name = "stoney";
989 chip_name = "polaris10";
992 chip_name = "polaris11";
995 chip_name = "polaris12";
1004 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1005 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1006 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1007 if (err == -ENOENT) {
1008 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1009 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1013 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1017 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1020 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1021 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1022 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1024 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1026 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1027 if (err == -ENOENT) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1029 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1032 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1033 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1037 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1040 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1041 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1043 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1045 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1046 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1047 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1048 if (err == -ENOENT) {
1049 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1050 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1053 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1054 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1058 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1061 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1062 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1063 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1066 * Support for MCBP/Virtualization in combination with chained IBs is
1067 * formal released on feature version #46
1069 if (adev->gfx.ce_feature_version >= 46 &&
1070 adev->gfx.pfp_feature_version >= 46) {
1071 adev->virt.chained_ib_support = true;
1072 DRM_INFO("Chained IB support enabled!\n");
1074 adev->virt.chained_ib_support = false;
1076 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1077 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1080 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1081 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1082 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1083 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1085 adev->gfx.rlc.save_and_restore_offset =
1086 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1087 adev->gfx.rlc.clear_state_descriptor_offset =
1088 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1089 adev->gfx.rlc.avail_scratch_ram_locations =
1090 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1091 adev->gfx.rlc.reg_restore_list_size =
1092 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1093 adev->gfx.rlc.reg_list_format_start =
1094 le32_to_cpu(rlc_hdr->reg_list_format_start);
1095 adev->gfx.rlc.reg_list_format_separate_start =
1096 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1097 adev->gfx.rlc.starting_offsets_start =
1098 le32_to_cpu(rlc_hdr->starting_offsets_start);
1099 adev->gfx.rlc.reg_list_format_size_bytes =
1100 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1101 adev->gfx.rlc.reg_list_size_bytes =
1102 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1104 adev->gfx.rlc.register_list_format =
1105 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1106 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1108 if (!adev->gfx.rlc.register_list_format) {
1113 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1114 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1115 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1116 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1118 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1120 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1121 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1122 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1123 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1125 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1126 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1127 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1128 if (err == -ENOENT) {
1129 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1130 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1133 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1134 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1138 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1141 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1142 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1143 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1145 if ((adev->asic_type != CHIP_STONEY) &&
1146 (adev->asic_type != CHIP_TOPAZ)) {
1147 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1148 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1149 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1150 if (err == -ENOENT) {
1151 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1152 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1155 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1156 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1159 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1162 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1163 adev->gfx.mec2_fw->data;
1164 adev->gfx.mec2_fw_version =
1165 le32_to_cpu(cp_hdr->header.ucode_version);
1166 adev->gfx.mec2_feature_version =
1167 le32_to_cpu(cp_hdr->ucode_feature_version);
1170 adev->gfx.mec2_fw = NULL;
1174 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1175 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1176 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1177 info->fw = adev->gfx.pfp_fw;
1178 header = (const struct common_firmware_header *)info->fw->data;
1179 adev->firmware.fw_size +=
1180 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1182 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1183 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1184 info->fw = adev->gfx.me_fw;
1185 header = (const struct common_firmware_header *)info->fw->data;
1186 adev->firmware.fw_size +=
1187 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1189 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1190 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1191 info->fw = adev->gfx.ce_fw;
1192 header = (const struct common_firmware_header *)info->fw->data;
1193 adev->firmware.fw_size +=
1194 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1196 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1197 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1198 info->fw = adev->gfx.rlc_fw;
1199 header = (const struct common_firmware_header *)info->fw->data;
1200 adev->firmware.fw_size +=
1201 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1203 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1204 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1205 info->fw = adev->gfx.mec_fw;
1206 header = (const struct common_firmware_header *)info->fw->data;
1207 adev->firmware.fw_size +=
1208 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1210 /* we need account JT in */
1211 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212 adev->firmware.fw_size +=
1213 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1215 if (amdgpu_sriov_vf(adev)) {
1216 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1217 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1218 info->fw = adev->gfx.mec_fw;
1219 adev->firmware.fw_size +=
1220 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1223 if (adev->gfx.mec2_fw) {
1224 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1225 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1226 info->fw = adev->gfx.mec2_fw;
1227 header = (const struct common_firmware_header *)info->fw->data;
1228 adev->firmware.fw_size +=
1229 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1237 "gfx8: Failed to load firmware \"%s\"\n",
1239 release_firmware(adev->gfx.pfp_fw);
1240 adev->gfx.pfp_fw = NULL;
1241 release_firmware(adev->gfx.me_fw);
1242 adev->gfx.me_fw = NULL;
1243 release_firmware(adev->gfx.ce_fw);
1244 adev->gfx.ce_fw = NULL;
1245 release_firmware(adev->gfx.rlc_fw);
1246 adev->gfx.rlc_fw = NULL;
1247 release_firmware(adev->gfx.mec_fw);
1248 adev->gfx.mec_fw = NULL;
1249 release_firmware(adev->gfx.mec2_fw);
1250 adev->gfx.mec2_fw = NULL;
1255 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1256 volatile u32 *buffer)
1259 const struct cs_section_def *sect = NULL;
1260 const struct cs_extent_def *ext = NULL;
1262 if (adev->gfx.rlc.cs_data == NULL)
1267 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1268 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1270 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1271 buffer[count++] = cpu_to_le32(0x80000000);
1272 buffer[count++] = cpu_to_le32(0x80000000);
1274 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1275 for (ext = sect->section; ext->extent != NULL; ++ext) {
1276 if (sect->id == SECT_CONTEXT) {
1278 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1279 buffer[count++] = cpu_to_le32(ext->reg_index -
1280 PACKET3_SET_CONTEXT_REG_START);
1281 for (i = 0; i < ext->reg_count; i++)
1282 buffer[count++] = cpu_to_le32(ext->extent[i]);
1289 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1290 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1291 PACKET3_SET_CONTEXT_REG_START);
1292 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1293 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1295 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1296 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1298 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1299 buffer[count++] = cpu_to_le32(0);
1302 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1304 const __le32 *fw_data;
1305 volatile u32 *dst_ptr;
1306 int me, i, max_me = 4;
1308 u32 table_offset, table_size;
1310 if (adev->asic_type == CHIP_CARRIZO)
1313 /* write the cp table buffer */
1314 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1315 for (me = 0; me < max_me; me++) {
1317 const struct gfx_firmware_header_v1_0 *hdr =
1318 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1319 fw_data = (const __le32 *)
1320 (adev->gfx.ce_fw->data +
1321 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1322 table_offset = le32_to_cpu(hdr->jt_offset);
1323 table_size = le32_to_cpu(hdr->jt_size);
1324 } else if (me == 1) {
1325 const struct gfx_firmware_header_v1_0 *hdr =
1326 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1327 fw_data = (const __le32 *)
1328 (adev->gfx.pfp_fw->data +
1329 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1330 table_offset = le32_to_cpu(hdr->jt_offset);
1331 table_size = le32_to_cpu(hdr->jt_size);
1332 } else if (me == 2) {
1333 const struct gfx_firmware_header_v1_0 *hdr =
1334 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1335 fw_data = (const __le32 *)
1336 (adev->gfx.me_fw->data +
1337 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1338 table_offset = le32_to_cpu(hdr->jt_offset);
1339 table_size = le32_to_cpu(hdr->jt_size);
1340 } else if (me == 3) {
1341 const struct gfx_firmware_header_v1_0 *hdr =
1342 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1343 fw_data = (const __le32 *)
1344 (adev->gfx.mec_fw->data +
1345 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1346 table_offset = le32_to_cpu(hdr->jt_offset);
1347 table_size = le32_to_cpu(hdr->jt_size);
1348 } else if (me == 4) {
1349 const struct gfx_firmware_header_v1_0 *hdr =
1350 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1351 fw_data = (const __le32 *)
1352 (adev->gfx.mec2_fw->data +
1353 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1354 table_offset = le32_to_cpu(hdr->jt_offset);
1355 table_size = le32_to_cpu(hdr->jt_size);
1358 for (i = 0; i < table_size; i ++) {
1359 dst_ptr[bo_offset + i] =
1360 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1363 bo_offset += table_size;
1367 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1369 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1370 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1373 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1375 volatile u32 *dst_ptr;
1377 const struct cs_section_def *cs_data;
1380 adev->gfx.rlc.cs_data = vi_cs_data;
1382 cs_data = adev->gfx.rlc.cs_data;
1385 /* clear state block */
1386 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1388 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1389 AMDGPU_GEM_DOMAIN_VRAM,
1390 &adev->gfx.rlc.clear_state_obj,
1391 &adev->gfx.rlc.clear_state_gpu_addr,
1392 (void **)&adev->gfx.rlc.cs_ptr);
1394 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1395 gfx_v8_0_rlc_fini(adev);
1399 /* set up the cs buffer */
1400 dst_ptr = adev->gfx.rlc.cs_ptr;
1401 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1402 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1403 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1406 if ((adev->asic_type == CHIP_CARRIZO) ||
1407 (adev->asic_type == CHIP_STONEY)) {
1408 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1409 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1410 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1411 &adev->gfx.rlc.cp_table_obj,
1412 &adev->gfx.rlc.cp_table_gpu_addr,
1413 (void **)&adev->gfx.rlc.cp_table_ptr);
1415 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1419 cz_init_cp_jump_table(adev);
1421 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1422 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1428 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1430 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1433 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1437 size_t mec_hpd_size;
1439 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1441 /* take ownership of the relevant compute queues */
1442 amdgpu_gfx_compute_queue_acquire(adev);
1444 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1446 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1447 AMDGPU_GEM_DOMAIN_GTT,
1448 &adev->gfx.mec.hpd_eop_obj,
1449 &adev->gfx.mec.hpd_eop_gpu_addr,
1452 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1456 memset(hpd, 0, mec_hpd_size);
1458 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1459 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1464 static const u32 vgpr_init_compute_shader[] =
1466 0x7e000209, 0x7e020208,
1467 0x7e040207, 0x7e060206,
1468 0x7e080205, 0x7e0a0204,
1469 0x7e0c0203, 0x7e0e0202,
1470 0x7e100201, 0x7e120200,
1471 0x7e140209, 0x7e160208,
1472 0x7e180207, 0x7e1a0206,
1473 0x7e1c0205, 0x7e1e0204,
1474 0x7e200203, 0x7e220202,
1475 0x7e240201, 0x7e260200,
1476 0x7e280209, 0x7e2a0208,
1477 0x7e2c0207, 0x7e2e0206,
1478 0x7e300205, 0x7e320204,
1479 0x7e340203, 0x7e360202,
1480 0x7e380201, 0x7e3a0200,
1481 0x7e3c0209, 0x7e3e0208,
1482 0x7e400207, 0x7e420206,
1483 0x7e440205, 0x7e460204,
1484 0x7e480203, 0x7e4a0202,
1485 0x7e4c0201, 0x7e4e0200,
1486 0x7e500209, 0x7e520208,
1487 0x7e540207, 0x7e560206,
1488 0x7e580205, 0x7e5a0204,
1489 0x7e5c0203, 0x7e5e0202,
1490 0x7e600201, 0x7e620200,
1491 0x7e640209, 0x7e660208,
1492 0x7e680207, 0x7e6a0206,
1493 0x7e6c0205, 0x7e6e0204,
1494 0x7e700203, 0x7e720202,
1495 0x7e740201, 0x7e760200,
1496 0x7e780209, 0x7e7a0208,
1497 0x7e7c0207, 0x7e7e0206,
1498 0xbf8a0000, 0xbf810000,
1501 static const u32 sgpr_init_compute_shader[] =
1503 0xbe8a0100, 0xbe8c0102,
1504 0xbe8e0104, 0xbe900106,
1505 0xbe920108, 0xbe940100,
1506 0xbe960102, 0xbe980104,
1507 0xbe9a0106, 0xbe9c0108,
1508 0xbe9e0100, 0xbea00102,
1509 0xbea20104, 0xbea40106,
1510 0xbea60108, 0xbea80100,
1511 0xbeaa0102, 0xbeac0104,
1512 0xbeae0106, 0xbeb00108,
1513 0xbeb20100, 0xbeb40102,
1514 0xbeb60104, 0xbeb80106,
1515 0xbeba0108, 0xbebc0100,
1516 0xbebe0102, 0xbec00104,
1517 0xbec20106, 0xbec40108,
1518 0xbec60100, 0xbec80102,
1519 0xbee60004, 0xbee70005,
1520 0xbeea0006, 0xbeeb0007,
1521 0xbee80008, 0xbee90009,
1522 0xbefc0000, 0xbf8a0000,
1523 0xbf810000, 0x00000000,
1526 static const u32 vgpr_init_regs[] =
1528 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1529 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1530 mmCOMPUTE_NUM_THREAD_X, 256*4,
1531 mmCOMPUTE_NUM_THREAD_Y, 1,
1532 mmCOMPUTE_NUM_THREAD_Z, 1,
1533 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1534 mmCOMPUTE_PGM_RSRC2, 20,
1535 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1536 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1537 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1538 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1539 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1540 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1541 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1542 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1543 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1544 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1547 static const u32 sgpr1_init_regs[] =
1549 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1550 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1551 mmCOMPUTE_NUM_THREAD_X, 256*5,
1552 mmCOMPUTE_NUM_THREAD_Y, 1,
1553 mmCOMPUTE_NUM_THREAD_Z, 1,
1554 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1555 mmCOMPUTE_PGM_RSRC2, 20,
1556 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1557 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1558 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1559 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1560 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1561 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1562 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1563 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1564 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1565 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1568 static const u32 sgpr2_init_regs[] =
1570 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1571 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1572 mmCOMPUTE_NUM_THREAD_X, 256*5,
1573 mmCOMPUTE_NUM_THREAD_Y, 1,
1574 mmCOMPUTE_NUM_THREAD_Z, 1,
1575 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1576 mmCOMPUTE_PGM_RSRC2, 20,
1577 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1578 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1579 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1580 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1581 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1582 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1583 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1584 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1585 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1586 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1589 static const u32 sec_ded_counter_registers[] =
1592 mmCPC_EDC_SCRATCH_CNT,
1593 mmCPC_EDC_UCODE_CNT,
1600 mmDC_EDC_CSINVOC_CNT,
1601 mmDC_EDC_RESTORE_CNT,
1607 mmSQC_ATC_EDC_GATCL1_CNT,
1613 mmTCP_ATC_EDC_GATCL1_CNT,
1618 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1620 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1621 struct amdgpu_ib ib;
1622 struct dma_fence *f = NULL;
1625 unsigned total_size, vgpr_offset, sgpr_offset;
1628 /* only supported on CZ */
1629 if (adev->asic_type != CHIP_CARRIZO)
1632 /* bail if the compute ring is not ready */
1636 tmp = RREG32(mmGB_EDC_MODE);
1637 WREG32(mmGB_EDC_MODE, 0);
1640 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1642 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1644 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1645 total_size = ALIGN(total_size, 256);
1646 vgpr_offset = total_size;
1647 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1648 sgpr_offset = total_size;
1649 total_size += sizeof(sgpr_init_compute_shader);
1651 /* allocate an indirect buffer to put the commands in */
1652 memset(&ib, 0, sizeof(ib));
1653 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1655 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1659 /* load the compute shaders */
1660 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1661 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1663 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1664 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1666 /* init the ib length to 0 */
1670 /* write the register state for the compute dispatch */
1671 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1672 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1673 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1674 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1676 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1677 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1678 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1679 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1680 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1681 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1683 /* write dispatch packet */
1684 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1685 ib.ptr[ib.length_dw++] = 8; /* x */
1686 ib.ptr[ib.length_dw++] = 1; /* y */
1687 ib.ptr[ib.length_dw++] = 1; /* z */
1688 ib.ptr[ib.length_dw++] =
1689 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1691 /* write CS partial flush packet */
1692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1693 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1696 /* write the register state for the compute dispatch */
1697 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1698 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1699 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1700 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1702 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1703 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1704 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1705 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1706 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1707 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1709 /* write dispatch packet */
1710 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1711 ib.ptr[ib.length_dw++] = 8; /* x */
1712 ib.ptr[ib.length_dw++] = 1; /* y */
1713 ib.ptr[ib.length_dw++] = 1; /* z */
1714 ib.ptr[ib.length_dw++] =
1715 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1717 /* write CS partial flush packet */
1718 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1719 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1722 /* write the register state for the compute dispatch */
1723 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1724 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1725 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1726 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1728 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1729 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1730 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1731 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1732 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1733 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1735 /* write dispatch packet */
1736 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1737 ib.ptr[ib.length_dw++] = 8; /* x */
1738 ib.ptr[ib.length_dw++] = 1; /* y */
1739 ib.ptr[ib.length_dw++] = 1; /* z */
1740 ib.ptr[ib.length_dw++] =
1741 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1743 /* write CS partial flush packet */
1744 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1745 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1747 /* shedule the ib on the ring */
1748 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1750 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1754 /* wait for the GPU to finish processing the IB */
1755 r = dma_fence_wait(f, false);
1757 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1761 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1762 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1763 WREG32(mmGB_EDC_MODE, tmp);
1765 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1766 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1767 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1770 /* read back registers to clear the counters */
1771 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1772 RREG32(sec_ded_counter_registers[i]);
1775 amdgpu_ib_free(adev, &ib, NULL);
1781 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1784 u32 mc_shared_chmap, mc_arb_ramcfg;
1785 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1789 switch (adev->asic_type) {
1791 adev->gfx.config.max_shader_engines = 1;
1792 adev->gfx.config.max_tile_pipes = 2;
1793 adev->gfx.config.max_cu_per_sh = 6;
1794 adev->gfx.config.max_sh_per_se = 1;
1795 adev->gfx.config.max_backends_per_se = 2;
1796 adev->gfx.config.max_texture_channel_caches = 2;
1797 adev->gfx.config.max_gprs = 256;
1798 adev->gfx.config.max_gs_threads = 32;
1799 adev->gfx.config.max_hw_contexts = 8;
1801 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1802 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1803 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1804 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1805 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1808 adev->gfx.config.max_shader_engines = 4;
1809 adev->gfx.config.max_tile_pipes = 16;
1810 adev->gfx.config.max_cu_per_sh = 16;
1811 adev->gfx.config.max_sh_per_se = 1;
1812 adev->gfx.config.max_backends_per_se = 4;
1813 adev->gfx.config.max_texture_channel_caches = 16;
1814 adev->gfx.config.max_gprs = 256;
1815 adev->gfx.config.max_gs_threads = 32;
1816 adev->gfx.config.max_hw_contexts = 8;
1818 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1819 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1820 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1821 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1822 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1824 case CHIP_POLARIS11:
1825 case CHIP_POLARIS12:
1826 ret = amdgpu_atombios_get_gfx_info(adev);
1829 adev->gfx.config.max_gprs = 256;
1830 adev->gfx.config.max_gs_threads = 32;
1831 adev->gfx.config.max_hw_contexts = 8;
1833 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1837 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1839 case CHIP_POLARIS10:
1841 ret = amdgpu_atombios_get_gfx_info(adev);
1844 adev->gfx.config.max_gprs = 256;
1845 adev->gfx.config.max_gs_threads = 32;
1846 adev->gfx.config.max_hw_contexts = 8;
1848 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1849 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1850 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1851 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1852 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1855 adev->gfx.config.max_shader_engines = 4;
1856 adev->gfx.config.max_tile_pipes = 8;
1857 adev->gfx.config.max_cu_per_sh = 8;
1858 adev->gfx.config.max_sh_per_se = 1;
1859 adev->gfx.config.max_backends_per_se = 2;
1860 adev->gfx.config.max_texture_channel_caches = 8;
1861 adev->gfx.config.max_gprs = 256;
1862 adev->gfx.config.max_gs_threads = 32;
1863 adev->gfx.config.max_hw_contexts = 8;
1865 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1866 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1867 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1868 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1869 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1872 adev->gfx.config.max_shader_engines = 1;
1873 adev->gfx.config.max_tile_pipes = 2;
1874 adev->gfx.config.max_sh_per_se = 1;
1875 adev->gfx.config.max_backends_per_se = 2;
1876 adev->gfx.config.max_cu_per_sh = 8;
1877 adev->gfx.config.max_texture_channel_caches = 2;
1878 adev->gfx.config.max_gprs = 256;
1879 adev->gfx.config.max_gs_threads = 32;
1880 adev->gfx.config.max_hw_contexts = 8;
1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1889 adev->gfx.config.max_shader_engines = 1;
1890 adev->gfx.config.max_tile_pipes = 2;
1891 adev->gfx.config.max_sh_per_se = 1;
1892 adev->gfx.config.max_backends_per_se = 1;
1893 adev->gfx.config.max_cu_per_sh = 3;
1894 adev->gfx.config.max_texture_channel_caches = 2;
1895 adev->gfx.config.max_gprs = 256;
1896 adev->gfx.config.max_gs_threads = 16;
1897 adev->gfx.config.max_hw_contexts = 8;
1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1906 adev->gfx.config.max_shader_engines = 2;
1907 adev->gfx.config.max_tile_pipes = 4;
1908 adev->gfx.config.max_cu_per_sh = 2;
1909 adev->gfx.config.max_sh_per_se = 1;
1910 adev->gfx.config.max_backends_per_se = 2;
1911 adev->gfx.config.max_texture_channel_caches = 4;
1912 adev->gfx.config.max_gprs = 256;
1913 adev->gfx.config.max_gs_threads = 32;
1914 adev->gfx.config.max_hw_contexts = 8;
1916 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1917 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1918 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1919 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1920 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1924 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1925 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1926 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1928 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1929 adev->gfx.config.mem_max_burst_length_bytes = 256;
1930 if (adev->flags & AMD_IS_APU) {
1931 /* Get memory bank mapping mode. */
1932 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1933 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1934 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1936 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1937 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1938 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1940 /* Validate settings in case only one DIMM installed. */
1941 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1942 dimm00_addr_map = 0;
1943 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1944 dimm01_addr_map = 0;
1945 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1946 dimm10_addr_map = 0;
1947 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1948 dimm11_addr_map = 0;
1950 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1951 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1952 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1953 adev->gfx.config.mem_row_size_in_kb = 2;
1955 adev->gfx.config.mem_row_size_in_kb = 1;
1957 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1958 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1959 if (adev->gfx.config.mem_row_size_in_kb > 4)
1960 adev->gfx.config.mem_row_size_in_kb = 4;
1963 adev->gfx.config.shader_engine_tile_size = 32;
1964 adev->gfx.config.num_gpus = 1;
1965 adev->gfx.config.multi_gpu_tile_size = 64;
1967 /* fix up row size */
1968 switch (adev->gfx.config.mem_row_size_in_kb) {
1971 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1974 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1977 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1980 adev->gfx.config.gb_addr_config = gb_addr_config;
1985 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1986 int mec, int pipe, int queue)
1990 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1992 ring = &adev->gfx.compute_ring[ring_id];
1997 ring->queue = queue;
1999 ring->ring_obj = NULL;
2000 ring->use_doorbell = true;
2001 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2002 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2003 + (ring_id * GFX8_MEC_HPD_SIZE);
2004 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2006 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2007 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2010 /* type-2 packets are deprecated on MEC, use type-3 instead */
2011 r = amdgpu_ring_init(adev, ring, 1024,
2012 &adev->gfx.eop_irq, irq_type);
2020 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2022 static int gfx_v8_0_sw_init(void *handle)
2024 int i, j, k, r, ring_id;
2025 struct amdgpu_ring *ring;
2026 struct amdgpu_kiq *kiq;
2027 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2029 switch (adev->asic_type) {
2033 case CHIP_POLARIS10:
2034 case CHIP_POLARIS11:
2035 case CHIP_POLARIS12:
2037 adev->gfx.mec.num_mec = 2;
2042 adev->gfx.mec.num_mec = 1;
2046 adev->gfx.mec.num_pipe_per_mec = 4;
2047 adev->gfx.mec.num_queue_per_pipe = 8;
2050 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2055 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2059 /* Privileged reg */
2060 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2061 &adev->gfx.priv_reg_irq);
2065 /* Privileged inst */
2066 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2067 &adev->gfx.priv_inst_irq);
2071 /* Add CP EDC/ECC irq */
2072 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 197,
2073 &adev->gfx.cp_ecc_error_irq);
2077 /* SQ interrupts. */
2078 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239,
2081 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2085 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2087 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2089 gfx_v8_0_scratch_init(adev);
2091 r = gfx_v8_0_init_microcode(adev);
2093 DRM_ERROR("Failed to load gfx firmware!\n");
2097 r = gfx_v8_0_rlc_init(adev);
2099 DRM_ERROR("Failed to init rlc BOs!\n");
2103 r = gfx_v8_0_mec_init(adev);
2105 DRM_ERROR("Failed to init MEC BOs!\n");
2109 /* set up the gfx ring */
2110 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2111 ring = &adev->gfx.gfx_ring[i];
2112 ring->ring_obj = NULL;
2113 sprintf(ring->name, "gfx");
2114 /* no gfx doorbells on iceland */
2115 if (adev->asic_type != CHIP_TOPAZ) {
2116 ring->use_doorbell = true;
2117 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2120 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2121 AMDGPU_CP_IRQ_GFX_EOP);
2127 /* set up the compute queues - allocate horizontally across pipes */
2129 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2130 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2131 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2132 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2135 r = gfx_v8_0_compute_ring_init(adev,
2146 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2148 DRM_ERROR("Failed to init KIQ BOs!\n");
2152 kiq = &adev->gfx.kiq;
2153 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2157 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2158 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2162 /* reserve GDS, GWS and OA resource for gfx */
2163 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2164 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2165 &adev->gds.gds_gfx_bo, NULL, NULL);
2169 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2170 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2171 &adev->gds.gws_gfx_bo, NULL, NULL);
2175 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2176 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2177 &adev->gds.oa_gfx_bo, NULL, NULL);
2181 adev->gfx.ce_ram_size = 0x8000;
2183 r = gfx_v8_0_gpu_early_init(adev);
2190 static int gfx_v8_0_sw_fini(void *handle)
2193 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2195 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2196 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2197 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2199 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2200 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2201 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2202 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2204 amdgpu_gfx_compute_mqd_sw_fini(adev);
2205 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2206 amdgpu_gfx_kiq_fini(adev);
2208 gfx_v8_0_mec_fini(adev);
2209 gfx_v8_0_rlc_fini(adev);
2210 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2211 &adev->gfx.rlc.clear_state_gpu_addr,
2212 (void **)&adev->gfx.rlc.cs_ptr);
2213 if ((adev->asic_type == CHIP_CARRIZO) ||
2214 (adev->asic_type == CHIP_STONEY)) {
2215 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2216 &adev->gfx.rlc.cp_table_gpu_addr,
2217 (void **)&adev->gfx.rlc.cp_table_ptr);
2219 gfx_v8_0_free_microcode(adev);
2224 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2226 uint32_t *modearray, *mod2array;
2227 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2228 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2231 modearray = adev->gfx.config.tile_mode_array;
2232 mod2array = adev->gfx.config.macrotile_mode_array;
2234 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2235 modearray[reg_offset] = 0;
2237 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2238 mod2array[reg_offset] = 0;
2240 switch (adev->asic_type) {
2242 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 PIPE_CONFIG(ADDR_SURF_P2) |
2244 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2246 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247 PIPE_CONFIG(ADDR_SURF_P2) |
2248 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2249 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2251 PIPE_CONFIG(ADDR_SURF_P2) |
2252 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2254 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255 PIPE_CONFIG(ADDR_SURF_P2) |
2256 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2257 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2258 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 PIPE_CONFIG(ADDR_SURF_P2) |
2260 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2261 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2262 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P2) |
2264 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2265 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2266 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P2) |
2268 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2269 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2270 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2271 PIPE_CONFIG(ADDR_SURF_P2));
2272 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277 PIPE_CONFIG(ADDR_SURF_P2) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2280 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2281 PIPE_CONFIG(ADDR_SURF_P2) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2284 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2285 PIPE_CONFIG(ADDR_SURF_P2) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2289 PIPE_CONFIG(ADDR_SURF_P2) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2292 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2293 PIPE_CONFIG(ADDR_SURF_P2) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2296 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2297 PIPE_CONFIG(ADDR_SURF_P2) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2300 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2301 PIPE_CONFIG(ADDR_SURF_P2) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2304 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2305 PIPE_CONFIG(ADDR_SURF_P2) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2308 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2309 PIPE_CONFIG(ADDR_SURF_P2) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2312 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2313 PIPE_CONFIG(ADDR_SURF_P2) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2316 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2317 PIPE_CONFIG(ADDR_SURF_P2) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2320 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2321 PIPE_CONFIG(ADDR_SURF_P2) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2324 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2325 PIPE_CONFIG(ADDR_SURF_P2) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2328 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2329 PIPE_CONFIG(ADDR_SURF_P2) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2332 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333 PIPE_CONFIG(ADDR_SURF_P2) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 PIPE_CONFIG(ADDR_SURF_P2) |
2338 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341 PIPE_CONFIG(ADDR_SURF_P2) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2345 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2348 NUM_BANKS(ADDR_SURF_8_BANK));
2349 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352 NUM_BANKS(ADDR_SURF_8_BANK));
2353 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2356 NUM_BANKS(ADDR_SURF_8_BANK));
2357 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360 NUM_BANKS(ADDR_SURF_8_BANK));
2361 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 NUM_BANKS(ADDR_SURF_8_BANK));
2365 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2368 NUM_BANKS(ADDR_SURF_8_BANK));
2369 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2372 NUM_BANKS(ADDR_SURF_8_BANK));
2373 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2376 NUM_BANKS(ADDR_SURF_16_BANK));
2377 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2380 NUM_BANKS(ADDR_SURF_16_BANK));
2381 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2384 NUM_BANKS(ADDR_SURF_16_BANK));
2385 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388 NUM_BANKS(ADDR_SURF_16_BANK));
2389 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2392 NUM_BANKS(ADDR_SURF_16_BANK));
2393 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2396 NUM_BANKS(ADDR_SURF_16_BANK));
2397 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400 NUM_BANKS(ADDR_SURF_8_BANK));
2402 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2403 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2405 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2407 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2408 if (reg_offset != 7)
2409 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2414 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2415 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2417 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2418 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2421 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2422 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2425 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2426 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2430 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2448 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2457 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2460 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2461 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2462 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2464 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2476 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2484 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2488 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2492 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2496 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2497 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2500 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2501 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2505 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2513 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2517 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2528 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2532 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2537 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2540 NUM_BANKS(ADDR_SURF_8_BANK));
2541 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2544 NUM_BANKS(ADDR_SURF_8_BANK));
2545 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 NUM_BANKS(ADDR_SURF_8_BANK));
2549 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552 NUM_BANKS(ADDR_SURF_8_BANK));
2553 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2556 NUM_BANKS(ADDR_SURF_8_BANK));
2557 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560 NUM_BANKS(ADDR_SURF_8_BANK));
2561 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2564 NUM_BANKS(ADDR_SURF_8_BANK));
2565 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568 NUM_BANKS(ADDR_SURF_8_BANK));
2569 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2572 NUM_BANKS(ADDR_SURF_8_BANK));
2573 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576 NUM_BANKS(ADDR_SURF_8_BANK));
2577 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580 NUM_BANKS(ADDR_SURF_8_BANK));
2581 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584 NUM_BANKS(ADDR_SURF_8_BANK));
2585 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2588 NUM_BANKS(ADDR_SURF_8_BANK));
2589 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592 NUM_BANKS(ADDR_SURF_4_BANK));
2594 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2595 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2597 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2598 if (reg_offset != 7)
2599 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2603 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2606 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2610 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2611 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2614 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2615 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2618 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2619 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2631 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2637 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2649 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2653 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2665 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2666 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2669 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2670 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2673 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2677 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2681 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2685 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2686 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2689 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2690 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2694 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2702 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2706 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2710 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2717 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2721 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2726 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729 NUM_BANKS(ADDR_SURF_16_BANK));
2730 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2732 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2733 NUM_BANKS(ADDR_SURF_16_BANK));
2734 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2736 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2737 NUM_BANKS(ADDR_SURF_16_BANK));
2738 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741 NUM_BANKS(ADDR_SURF_16_BANK));
2742 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2745 NUM_BANKS(ADDR_SURF_16_BANK));
2746 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749 NUM_BANKS(ADDR_SURF_16_BANK));
2750 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753 NUM_BANKS(ADDR_SURF_16_BANK));
2754 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757 NUM_BANKS(ADDR_SURF_16_BANK));
2758 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761 NUM_BANKS(ADDR_SURF_16_BANK));
2762 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765 NUM_BANKS(ADDR_SURF_16_BANK));
2766 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2769 NUM_BANKS(ADDR_SURF_16_BANK));
2770 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2773 NUM_BANKS(ADDR_SURF_8_BANK));
2774 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2777 NUM_BANKS(ADDR_SURF_4_BANK));
2778 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2781 NUM_BANKS(ADDR_SURF_4_BANK));
2783 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2784 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2786 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2787 if (reg_offset != 7)
2788 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2791 case CHIP_POLARIS11:
2792 case CHIP_POLARIS12:
2793 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2796 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2797 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2801 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2805 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2808 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2809 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2826 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2827 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2836 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2839 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2843 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2859 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2863 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2867 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2871 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2875 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2879 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2880 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2892 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2896 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2903 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2907 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2908 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2911 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2916 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2919 NUM_BANKS(ADDR_SURF_16_BANK));
2921 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2924 NUM_BANKS(ADDR_SURF_16_BANK));
2926 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929 NUM_BANKS(ADDR_SURF_16_BANK));
2931 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2934 NUM_BANKS(ADDR_SURF_16_BANK));
2936 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2939 NUM_BANKS(ADDR_SURF_16_BANK));
2941 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2942 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2943 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2944 NUM_BANKS(ADDR_SURF_16_BANK));
2946 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2949 NUM_BANKS(ADDR_SURF_16_BANK));
2951 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954 NUM_BANKS(ADDR_SURF_16_BANK));
2956 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959 NUM_BANKS(ADDR_SURF_16_BANK));
2961 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2962 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2963 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2964 NUM_BANKS(ADDR_SURF_16_BANK));
2966 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2967 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2968 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2969 NUM_BANKS(ADDR_SURF_16_BANK));
2971 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974 NUM_BANKS(ADDR_SURF_16_BANK));
2976 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2979 NUM_BANKS(ADDR_SURF_8_BANK));
2981 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2983 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2984 NUM_BANKS(ADDR_SURF_4_BANK));
2986 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2987 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2989 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2990 if (reg_offset != 7)
2991 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2994 case CHIP_POLARIS10:
2995 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2996 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2999 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3003 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3007 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3011 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3029 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3041 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3042 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3045 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3061 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3062 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3065 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3069 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3073 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3077 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3078 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3081 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3093 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3102 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3105 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3109 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3113 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3118 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121 NUM_BANKS(ADDR_SURF_16_BANK));
3123 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126 NUM_BANKS(ADDR_SURF_16_BANK));
3128 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131 NUM_BANKS(ADDR_SURF_16_BANK));
3133 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136 NUM_BANKS(ADDR_SURF_16_BANK));
3138 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3141 NUM_BANKS(ADDR_SURF_16_BANK));
3143 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3146 NUM_BANKS(ADDR_SURF_16_BANK));
3148 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3151 NUM_BANKS(ADDR_SURF_16_BANK));
3153 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3155 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3156 NUM_BANKS(ADDR_SURF_16_BANK));
3158 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3161 NUM_BANKS(ADDR_SURF_16_BANK));
3163 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166 NUM_BANKS(ADDR_SURF_16_BANK));
3168 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3171 NUM_BANKS(ADDR_SURF_16_BANK));
3173 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3176 NUM_BANKS(ADDR_SURF_8_BANK));
3178 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3181 NUM_BANKS(ADDR_SURF_4_BANK));
3183 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3186 NUM_BANKS(ADDR_SURF_4_BANK));
3188 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3189 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3191 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3192 if (reg_offset != 7)
3193 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3197 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198 PIPE_CONFIG(ADDR_SURF_P2) |
3199 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3200 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3201 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3202 PIPE_CONFIG(ADDR_SURF_P2) |
3203 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3204 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3205 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3206 PIPE_CONFIG(ADDR_SURF_P2) |
3207 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3208 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3209 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3210 PIPE_CONFIG(ADDR_SURF_P2) |
3211 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3213 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214 PIPE_CONFIG(ADDR_SURF_P2) |
3215 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3218 PIPE_CONFIG(ADDR_SURF_P2) |
3219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3222 PIPE_CONFIG(ADDR_SURF_P2) |
3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3226 PIPE_CONFIG(ADDR_SURF_P2));
3227 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3228 PIPE_CONFIG(ADDR_SURF_P2) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3231 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232 PIPE_CONFIG(ADDR_SURF_P2) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236 PIPE_CONFIG(ADDR_SURF_P2) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3239 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3240 PIPE_CONFIG(ADDR_SURF_P2) |
3241 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3243 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3244 PIPE_CONFIG(ADDR_SURF_P2) |
3245 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3248 PIPE_CONFIG(ADDR_SURF_P2) |
3249 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3255 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3259 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3263 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3264 PIPE_CONFIG(ADDR_SURF_P2) |
3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3267 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3268 PIPE_CONFIG(ADDR_SURF_P2) |
3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3271 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3272 PIPE_CONFIG(ADDR_SURF_P2) |
3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3276 PIPE_CONFIG(ADDR_SURF_P2) |
3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3280 PIPE_CONFIG(ADDR_SURF_P2) |
3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3291 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3300 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 NUM_BANKS(ADDR_SURF_8_BANK));
3304 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 NUM_BANKS(ADDR_SURF_8_BANK));
3308 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3311 NUM_BANKS(ADDR_SURF_8_BANK));
3312 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3315 NUM_BANKS(ADDR_SURF_8_BANK));
3316 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319 NUM_BANKS(ADDR_SURF_8_BANK));
3320 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3323 NUM_BANKS(ADDR_SURF_8_BANK));
3324 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 NUM_BANKS(ADDR_SURF_8_BANK));
3328 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331 NUM_BANKS(ADDR_SURF_16_BANK));
3332 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3335 NUM_BANKS(ADDR_SURF_16_BANK));
3336 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3339 NUM_BANKS(ADDR_SURF_16_BANK));
3340 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3343 NUM_BANKS(ADDR_SURF_16_BANK));
3344 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347 NUM_BANKS(ADDR_SURF_16_BANK));
3348 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351 NUM_BANKS(ADDR_SURF_16_BANK));
3352 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355 NUM_BANKS(ADDR_SURF_8_BANK));
3357 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3358 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3360 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3362 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3363 if (reg_offset != 7)
3364 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3369 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3373 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3374 PIPE_CONFIG(ADDR_SURF_P2) |
3375 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3376 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3377 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3378 PIPE_CONFIG(ADDR_SURF_P2) |
3379 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3380 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3381 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3382 PIPE_CONFIG(ADDR_SURF_P2) |
3383 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3384 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3385 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3386 PIPE_CONFIG(ADDR_SURF_P2) |
3387 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3388 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3389 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3390 PIPE_CONFIG(ADDR_SURF_P2) |
3391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3393 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3394 PIPE_CONFIG(ADDR_SURF_P2) |
3395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3397 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3398 PIPE_CONFIG(ADDR_SURF_P2) |
3399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3401 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3402 PIPE_CONFIG(ADDR_SURF_P2));
3403 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3404 PIPE_CONFIG(ADDR_SURF_P2) |
3405 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3407 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3408 PIPE_CONFIG(ADDR_SURF_P2) |
3409 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3411 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3412 PIPE_CONFIG(ADDR_SURF_P2) |
3413 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3415 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3416 PIPE_CONFIG(ADDR_SURF_P2) |
3417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3419 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3420 PIPE_CONFIG(ADDR_SURF_P2) |
3421 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3423 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3424 PIPE_CONFIG(ADDR_SURF_P2) |
3425 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3428 PIPE_CONFIG(ADDR_SURF_P2) |
3429 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3431 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3432 PIPE_CONFIG(ADDR_SURF_P2) |
3433 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3435 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3436 PIPE_CONFIG(ADDR_SURF_P2) |
3437 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3439 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3440 PIPE_CONFIG(ADDR_SURF_P2) |
3441 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3443 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3444 PIPE_CONFIG(ADDR_SURF_P2) |
3445 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3447 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3448 PIPE_CONFIG(ADDR_SURF_P2) |
3449 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3451 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3452 PIPE_CONFIG(ADDR_SURF_P2) |
3453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3455 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3456 PIPE_CONFIG(ADDR_SURF_P2) |
3457 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3459 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3460 PIPE_CONFIG(ADDR_SURF_P2) |
3461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3464 PIPE_CONFIG(ADDR_SURF_P2) |
3465 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3467 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3468 PIPE_CONFIG(ADDR_SURF_P2) |
3469 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3471 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3472 PIPE_CONFIG(ADDR_SURF_P2) |
3473 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3476 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479 NUM_BANKS(ADDR_SURF_8_BANK));
3480 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483 NUM_BANKS(ADDR_SURF_8_BANK));
3484 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3487 NUM_BANKS(ADDR_SURF_8_BANK));
3488 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3491 NUM_BANKS(ADDR_SURF_8_BANK));
3492 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3495 NUM_BANKS(ADDR_SURF_8_BANK));
3496 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3499 NUM_BANKS(ADDR_SURF_8_BANK));
3500 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3503 NUM_BANKS(ADDR_SURF_8_BANK));
3504 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3507 NUM_BANKS(ADDR_SURF_16_BANK));
3508 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3511 NUM_BANKS(ADDR_SURF_16_BANK));
3512 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3515 NUM_BANKS(ADDR_SURF_16_BANK));
3516 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3519 NUM_BANKS(ADDR_SURF_16_BANK));
3520 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3523 NUM_BANKS(ADDR_SURF_16_BANK));
3524 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3527 NUM_BANKS(ADDR_SURF_16_BANK));
3528 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3531 NUM_BANKS(ADDR_SURF_8_BANK));
3533 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3534 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3536 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3538 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3539 if (reg_offset != 7)
3540 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3546 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3547 u32 se_num, u32 sh_num, u32 instance)
3551 if (instance == 0xffffffff)
3552 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3554 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3556 if (se_num == 0xffffffff)
3557 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3559 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3561 if (sh_num == 0xffffffff)
3562 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3564 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3566 WREG32(mmGRBM_GFX_INDEX, data);
3569 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3570 u32 me, u32 pipe, u32 q)
3572 vi_srbm_select(adev, me, pipe, q, 0);
3575 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3579 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3580 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3582 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3584 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3585 adev->gfx.config.max_sh_per_se);
3587 return (~data) & mask;
3591 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3593 switch (adev->asic_type) {
3596 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3597 RB_XSEL2(1) | PKR_MAP(2) |
3598 PKR_XSEL(1) | PKR_YSEL(1) |
3599 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3600 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3604 case CHIP_POLARIS10:
3605 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3606 SE_XSEL(1) | SE_YSEL(1);
3607 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3612 *rconf |= RB_MAP_PKR0(2);
3615 case CHIP_POLARIS11:
3616 case CHIP_POLARIS12:
3617 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3618 SE_XSEL(1) | SE_YSEL(1);
3626 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3632 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3633 u32 raster_config, u32 raster_config_1,
3634 unsigned rb_mask, unsigned num_rb)
3636 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3637 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3638 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3639 unsigned rb_per_se = num_rb / num_se;
3640 unsigned se_mask[4];
3643 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3644 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3645 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3646 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3648 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3649 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3650 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3652 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3653 (!se_mask[2] && !se_mask[3]))) {
3654 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3656 if (!se_mask[0] && !se_mask[1]) {
3658 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3661 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3665 for (se = 0; se < num_se; se++) {
3666 unsigned raster_config_se = raster_config;
3667 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3668 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3669 int idx = (se / 2) * 2;
3671 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3672 raster_config_se &= ~SE_MAP_MASK;
3674 if (!se_mask[idx]) {
3675 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3677 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3681 pkr0_mask &= rb_mask;
3682 pkr1_mask &= rb_mask;
3683 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3684 raster_config_se &= ~PKR_MAP_MASK;
3687 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3689 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3693 if (rb_per_se >= 2) {
3694 unsigned rb0_mask = 1 << (se * rb_per_se);
3695 unsigned rb1_mask = rb0_mask << 1;
3697 rb0_mask &= rb_mask;
3698 rb1_mask &= rb_mask;
3699 if (!rb0_mask || !rb1_mask) {
3700 raster_config_se &= ~RB_MAP_PKR0_MASK;
3704 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3707 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3711 if (rb_per_se > 2) {
3712 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3713 rb1_mask = rb0_mask << 1;
3714 rb0_mask &= rb_mask;
3715 rb1_mask &= rb_mask;
3716 if (!rb0_mask || !rb1_mask) {
3717 raster_config_se &= ~RB_MAP_PKR1_MASK;
3721 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3724 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3730 /* GRBM_GFX_INDEX has a different offset on VI */
3731 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3732 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3733 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3736 /* GRBM_GFX_INDEX has a different offset on VI */
3737 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3740 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3744 u32 raster_config = 0, raster_config_1 = 0;
3746 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3747 adev->gfx.config.max_sh_per_se;
3748 unsigned num_rb_pipes;
3750 mutex_lock(&adev->grbm_idx_mutex);
3751 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3752 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3753 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3754 data = gfx_v8_0_get_rb_active_bitmap(adev);
3755 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3756 rb_bitmap_width_per_sh);
3759 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3761 adev->gfx.config.backend_enable_mask = active_rbs;
3762 adev->gfx.config.num_rbs = hweight32(active_rbs);
3764 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3765 adev->gfx.config.max_shader_engines, 16);
3767 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3769 if (!adev->gfx.config.backend_enable_mask ||
3770 adev->gfx.config.num_rbs >= num_rb_pipes) {
3771 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3772 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3774 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3775 adev->gfx.config.backend_enable_mask,
3779 /* cache the values for userspace */
3780 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3782 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3783 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3784 RREG32(mmCC_RB_BACKEND_DISABLE);
3785 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3786 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3787 adev->gfx.config.rb_config[i][j].raster_config =
3788 RREG32(mmPA_SC_RASTER_CONFIG);
3789 adev->gfx.config.rb_config[i][j].raster_config_1 =
3790 RREG32(mmPA_SC_RASTER_CONFIG_1);
3793 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794 mutex_unlock(&adev->grbm_idx_mutex);
3798 * gfx_v8_0_init_compute_vmid - gart enable
3800 * @adev: amdgpu_device pointer
3802 * Initialize compute vmid sh_mem registers
3805 #define DEFAULT_SH_MEM_BASES (0x6000)
3806 #define FIRST_COMPUTE_VMID (8)
3807 #define LAST_COMPUTE_VMID (16)
3808 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3811 uint32_t sh_mem_config;
3812 uint32_t sh_mem_bases;
3815 * Configure apertures:
3816 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3817 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3818 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3820 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3822 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3823 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3824 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3825 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3826 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3827 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3829 mutex_lock(&adev->srbm_mutex);
3830 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3831 vi_srbm_select(adev, 0, 0, 0, i);
3832 /* CP and shaders */
3833 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3834 WREG32(mmSH_MEM_APE1_BASE, 1);
3835 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3836 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3838 vi_srbm_select(adev, 0, 0, 0, 0);
3839 mutex_unlock(&adev->srbm_mutex);
3842 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3844 switch (adev->asic_type) {
3846 adev->gfx.config.double_offchip_lds_buf = 1;
3850 adev->gfx.config.double_offchip_lds_buf = 0;
3855 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3857 u32 tmp, sh_static_mem_cfg;
3860 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3861 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3862 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3863 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3865 gfx_v8_0_tiling_mode_table_init(adev);
3866 gfx_v8_0_setup_rb(adev);
3867 gfx_v8_0_get_cu_info(adev);
3868 gfx_v8_0_config_init(adev);
3870 /* XXX SH_MEM regs */
3871 /* where to put LDS, scratch, GPUVM in FSA64 space */
3872 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3874 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3876 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3878 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3880 mutex_lock(&adev->srbm_mutex);
3881 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3882 vi_srbm_select(adev, 0, 0, 0, i);
3883 /* CP and shaders */
3885 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3886 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3887 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3888 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3889 WREG32(mmSH_MEM_CONFIG, tmp);
3890 WREG32(mmSH_MEM_BASES, 0);
3892 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3893 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3894 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3895 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3896 WREG32(mmSH_MEM_CONFIG, tmp);
3897 tmp = adev->gmc.shared_aperture_start >> 48;
3898 WREG32(mmSH_MEM_BASES, tmp);
3901 WREG32(mmSH_MEM_APE1_BASE, 1);
3902 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3904 vi_srbm_select(adev, 0, 0, 0, 0);
3905 mutex_unlock(&adev->srbm_mutex);
3907 gfx_v8_0_init_compute_vmid(adev);
3909 mutex_lock(&adev->grbm_idx_mutex);
3911 * making sure that the following register writes will be broadcasted
3912 * to all the shaders
3914 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3916 WREG32(mmPA_SC_FIFO_SIZE,
3917 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3918 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3919 (adev->gfx.config.sc_prim_fifo_size_backend <<
3920 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3921 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3922 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3923 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3924 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3926 tmp = RREG32(mmSPI_ARB_PRIORITY);
3927 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3928 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3929 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3930 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3931 WREG32(mmSPI_ARB_PRIORITY, tmp);
3933 mutex_unlock(&adev->grbm_idx_mutex);
3937 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3942 mutex_lock(&adev->grbm_idx_mutex);
3943 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3944 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3945 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3946 for (k = 0; k < adev->usec_timeout; k++) {
3947 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3951 if (k == adev->usec_timeout) {
3952 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3953 0xffffffff, 0xffffffff);
3954 mutex_unlock(&adev->grbm_idx_mutex);
3955 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3961 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3962 mutex_unlock(&adev->grbm_idx_mutex);
3964 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3965 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3966 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3967 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3968 for (k = 0; k < adev->usec_timeout; k++) {
3969 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3975 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3978 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3980 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3981 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3982 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3985 WREG32(mmCP_INT_CNTL_RING0, tmp);
3988 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3991 WREG32(mmRLC_CSIB_ADDR_HI,
3992 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3993 WREG32(mmRLC_CSIB_ADDR_LO,
3994 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3995 WREG32(mmRLC_CSIB_LENGTH,
3996 adev->gfx.rlc.clear_state_size);
3999 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4002 int *unique_indices,
4005 int *ind_start_offsets,
4010 bool new_entry = true;
4012 for (; ind_offset < list_size; ind_offset++) {
4016 ind_start_offsets[*offset_count] = ind_offset;
4017 *offset_count = *offset_count + 1;
4018 BUG_ON(*offset_count >= max_offset);
4021 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4028 /* look for the matching indice */
4030 indices < *indices_count;
4032 if (unique_indices[indices] ==
4033 register_list_format[ind_offset])
4037 if (indices >= *indices_count) {
4038 unique_indices[*indices_count] =
4039 register_list_format[ind_offset];
4040 indices = *indices_count;
4041 *indices_count = *indices_count + 1;
4042 BUG_ON(*indices_count >= max_indices);
4045 register_list_format[ind_offset] = indices;
4049 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4052 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4053 int indices_count = 0;
4054 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4055 int offset_count = 0;
4058 unsigned int *register_list_format =
4059 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4060 if (!register_list_format)
4062 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4063 adev->gfx.rlc.reg_list_format_size_bytes);
4065 gfx_v8_0_parse_ind_reg_list(register_list_format,
4066 RLC_FormatDirectRegListLength,
4067 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4070 ARRAY_SIZE(unique_indices),
4071 indirect_start_offsets,
4073 ARRAY_SIZE(indirect_start_offsets));
4075 /* save and restore list */
4076 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4078 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4079 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4080 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4083 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4084 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4085 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4087 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4088 list_size = list_size >> 1;
4089 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4090 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4092 /* starting offsets starts */
4093 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4094 adev->gfx.rlc.starting_offsets_start);
4095 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4096 WREG32(mmRLC_GPM_SCRATCH_DATA,
4097 indirect_start_offsets[i]);
4099 /* unique indices */
4100 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4101 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4102 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4103 if (unique_indices[i] != 0) {
4104 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4105 WREG32(data + i, unique_indices[i] >> 20);
4108 kfree(register_list_format);
4113 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4115 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4118 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4122 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4124 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4125 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4126 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4127 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4128 WREG32(mmRLC_PG_DELAY, data);
4130 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4131 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4135 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4138 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4141 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4144 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4147 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4149 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4152 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4154 if ((adev->asic_type == CHIP_CARRIZO) ||
4155 (adev->asic_type == CHIP_STONEY)) {
4156 gfx_v8_0_init_csb(adev);
4157 gfx_v8_0_init_save_restore_list(adev);
4158 gfx_v8_0_enable_save_restore_machine(adev);
4159 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4160 gfx_v8_0_init_power_gating(adev);
4161 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4162 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4163 (adev->asic_type == CHIP_POLARIS12) ||
4164 (adev->asic_type == CHIP_VEGAM)) {
4165 gfx_v8_0_init_csb(adev);
4166 gfx_v8_0_init_save_restore_list(adev);
4167 gfx_v8_0_enable_save_restore_machine(adev);
4168 gfx_v8_0_init_power_gating(adev);
4173 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4175 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4177 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4178 gfx_v8_0_wait_for_rlc_serdes(adev);
4181 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4183 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4186 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4190 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4192 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4194 /* carrizo do enable cp interrupt after cp inited */
4195 if (!(adev->flags & AMD_IS_APU))
4196 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4201 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4203 const struct rlc_firmware_header_v2_0 *hdr;
4204 const __le32 *fw_data;
4205 unsigned i, fw_size;
4207 if (!adev->gfx.rlc_fw)
4210 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4211 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4213 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4214 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4215 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4217 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4218 for (i = 0; i < fw_size; i++)
4219 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4220 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4225 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4230 gfx_v8_0_rlc_stop(adev);
4233 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4234 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4235 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4236 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4237 if (adev->asic_type == CHIP_POLARIS11 ||
4238 adev->asic_type == CHIP_POLARIS10 ||
4239 adev->asic_type == CHIP_POLARIS12 ||
4240 adev->asic_type == CHIP_VEGAM) {
4241 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4243 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4247 WREG32(mmRLC_PG_CNTL, 0);
4249 gfx_v8_0_rlc_reset(adev);
4250 gfx_v8_0_init_pg(adev);
4253 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4254 /* legacy rlc firmware loading */
4255 r = gfx_v8_0_rlc_load_microcode(adev);
4260 gfx_v8_0_rlc_start(adev);
4265 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4268 u32 tmp = RREG32(mmCP_ME_CNTL);
4271 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4272 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4273 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4276 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4278 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4279 adev->gfx.gfx_ring[i].ready = false;
4281 WREG32(mmCP_ME_CNTL, tmp);
4285 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4287 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4288 const struct gfx_firmware_header_v1_0 *ce_hdr;
4289 const struct gfx_firmware_header_v1_0 *me_hdr;
4290 const __le32 *fw_data;
4291 unsigned i, fw_size;
4293 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4296 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4297 adev->gfx.pfp_fw->data;
4298 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4299 adev->gfx.ce_fw->data;
4300 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4301 adev->gfx.me_fw->data;
4303 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4304 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4305 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4307 gfx_v8_0_cp_gfx_enable(adev, false);
4310 fw_data = (const __le32 *)
4311 (adev->gfx.pfp_fw->data +
4312 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4313 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4314 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4315 for (i = 0; i < fw_size; i++)
4316 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4317 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4320 fw_data = (const __le32 *)
4321 (adev->gfx.ce_fw->data +
4322 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4323 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4324 WREG32(mmCP_CE_UCODE_ADDR, 0);
4325 for (i = 0; i < fw_size; i++)
4326 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4327 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4330 fw_data = (const __le32 *)
4331 (adev->gfx.me_fw->data +
4332 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4333 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4334 WREG32(mmCP_ME_RAM_WADDR, 0);
4335 for (i = 0; i < fw_size; i++)
4336 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4337 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4342 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4345 const struct cs_section_def *sect = NULL;
4346 const struct cs_extent_def *ext = NULL;
4348 /* begin clear state */
4350 /* context control state */
4353 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4354 for (ext = sect->section; ext->extent != NULL; ++ext) {
4355 if (sect->id == SECT_CONTEXT)
4356 count += 2 + ext->reg_count;
4361 /* pa_sc_raster_config/pa_sc_raster_config1 */
4363 /* end clear state */
4371 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4373 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4374 const struct cs_section_def *sect = NULL;
4375 const struct cs_extent_def *ext = NULL;
4379 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4380 WREG32(mmCP_ENDIAN_SWAP, 0);
4381 WREG32(mmCP_DEVICE_ID, 1);
4383 gfx_v8_0_cp_gfx_enable(adev, true);
4385 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4387 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4391 /* clear state buffer */
4392 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4393 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4395 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4396 amdgpu_ring_write(ring, 0x80000000);
4397 amdgpu_ring_write(ring, 0x80000000);
4399 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4400 for (ext = sect->section; ext->extent != NULL; ++ext) {
4401 if (sect->id == SECT_CONTEXT) {
4402 amdgpu_ring_write(ring,
4403 PACKET3(PACKET3_SET_CONTEXT_REG,
4405 amdgpu_ring_write(ring,
4406 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4407 for (i = 0; i < ext->reg_count; i++)
4408 amdgpu_ring_write(ring, ext->extent[i]);
4413 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4414 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4415 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4416 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4418 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4419 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4421 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4422 amdgpu_ring_write(ring, 0);
4424 /* init the CE partitions */
4425 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4426 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4427 amdgpu_ring_write(ring, 0x8000);
4428 amdgpu_ring_write(ring, 0x8000);
4430 amdgpu_ring_commit(ring);
4434 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4437 /* no gfx doorbells on iceland */
4438 if (adev->asic_type == CHIP_TOPAZ)
4441 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4443 if (ring->use_doorbell) {
4444 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4445 DOORBELL_OFFSET, ring->doorbell_index);
4446 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4448 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4451 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4454 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4456 if (adev->flags & AMD_IS_APU)
4459 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4460 DOORBELL_RANGE_LOWER,
4461 AMDGPU_DOORBELL_GFX_RING0);
4462 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4464 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4465 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4468 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4470 struct amdgpu_ring *ring;
4473 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4476 /* Set the write pointer delay */
4477 WREG32(mmCP_RB_WPTR_DELAY, 0);
4479 /* set the RB to use vmid 0 */
4480 WREG32(mmCP_RB_VMID, 0);
4482 /* Set ring buffer size */
4483 ring = &adev->gfx.gfx_ring[0];
4484 rb_bufsz = order_base_2(ring->ring_size / 8);
4485 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4486 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4487 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4490 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4492 WREG32(mmCP_RB0_CNTL, tmp);
4494 /* Initialize the ring buffer's read and write pointers */
4495 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4497 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4499 /* set the wb address wether it's enabled or not */
4500 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4501 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4502 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4504 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4505 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4506 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4508 WREG32(mmCP_RB0_CNTL, tmp);
4510 rb_addr = ring->gpu_addr >> 8;
4511 WREG32(mmCP_RB0_BASE, rb_addr);
4512 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4514 gfx_v8_0_set_cpg_door_bell(adev, ring);
4515 /* start the ring */
4516 amdgpu_ring_clear_ring(ring);
4517 gfx_v8_0_cp_gfx_start(adev);
4519 r = amdgpu_ring_test_ring(ring);
4521 ring->ready = false;
4526 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4531 WREG32(mmCP_MEC_CNTL, 0);
4533 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4534 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4535 adev->gfx.compute_ring[i].ready = false;
4536 adev->gfx.kiq.ring.ready = false;
4541 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4543 const struct gfx_firmware_header_v1_0 *mec_hdr;
4544 const __le32 *fw_data;
4545 unsigned i, fw_size;
4547 if (!adev->gfx.mec_fw)
4550 gfx_v8_0_cp_compute_enable(adev, false);
4552 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4553 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4555 fw_data = (const __le32 *)
4556 (adev->gfx.mec_fw->data +
4557 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4558 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4561 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4562 for (i = 0; i < fw_size; i++)
4563 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4564 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4566 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4567 if (adev->gfx.mec2_fw) {
4568 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4570 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4571 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4573 fw_data = (const __le32 *)
4574 (adev->gfx.mec2_fw->data +
4575 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4576 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4578 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4579 for (i = 0; i < fw_size; i++)
4580 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4581 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4588 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4591 struct amdgpu_device *adev = ring->adev;
4593 /* tell RLC which is KIQ queue */
4594 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4596 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4597 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4599 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4602 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4604 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4605 uint32_t scratch, tmp = 0;
4606 uint64_t queue_mask = 0;
4609 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4610 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4613 /* This situation may be hit in the future if a new HW
4614 * generation exposes more than 64 queues. If so, the
4615 * definition of queue_mask needs updating */
4616 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4617 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4621 queue_mask |= (1ull << i);
4624 r = amdgpu_gfx_scratch_get(adev, &scratch);
4626 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4629 WREG32(scratch, 0xCAFEDEAD);
4631 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4633 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4634 amdgpu_gfx_scratch_free(adev, scratch);
4638 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4639 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4640 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4641 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4642 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4643 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4644 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4645 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4646 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4647 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4648 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4649 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4652 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4653 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4654 amdgpu_ring_write(kiq_ring,
4655 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4656 amdgpu_ring_write(kiq_ring,
4657 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4658 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4659 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4660 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4661 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4662 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4663 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4664 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4666 /* write to scratch for completion */
4667 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4668 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4669 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4670 amdgpu_ring_commit(kiq_ring);
4672 for (i = 0; i < adev->usec_timeout; i++) {
4673 tmp = RREG32(scratch);
4674 if (tmp == 0xDEADBEEF)
4678 if (i >= adev->usec_timeout) {
4679 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4683 amdgpu_gfx_scratch_free(adev, scratch);
4688 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4692 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4693 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4694 for (i = 0; i < adev->usec_timeout; i++) {
4695 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4699 if (i == adev->usec_timeout)
4702 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4703 WREG32(mmCP_HQD_PQ_RPTR, 0);
4704 WREG32(mmCP_HQD_PQ_WPTR, 0);
4709 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4711 struct amdgpu_device *adev = ring->adev;
4712 struct vi_mqd *mqd = ring->mqd_ptr;
4713 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4716 mqd->header = 0xC0310800;
4717 mqd->compute_pipelinestat_enable = 0x00000001;
4718 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4719 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4720 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4721 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4722 mqd->compute_misc_reserved = 0x00000003;
4723 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4724 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4725 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4726 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4727 eop_base_addr = ring->eop_gpu_addr >> 8;
4728 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4729 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4731 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4732 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4733 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4734 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4736 mqd->cp_hqd_eop_control = tmp;
4738 /* enable doorbell? */
4739 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4740 CP_HQD_PQ_DOORBELL_CONTROL,
4742 ring->use_doorbell ? 1 : 0);
4744 mqd->cp_hqd_pq_doorbell_control = tmp;
4746 /* set the pointer to the MQD */
4747 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4748 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4750 /* set MQD vmid to 0 */
4751 tmp = RREG32(mmCP_MQD_CONTROL);
4752 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4753 mqd->cp_mqd_control = tmp;
4755 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4756 hqd_gpu_addr = ring->gpu_addr >> 8;
4757 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4758 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4760 /* set up the HQD, this is similar to CP_RB0_CNTL */
4761 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4762 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4763 (order_base_2(ring->ring_size / 4) - 1));
4764 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4765 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4767 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4769 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4770 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4771 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4772 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4773 mqd->cp_hqd_pq_control = tmp;
4775 /* set the wb address whether it's enabled or not */
4776 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4777 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4778 mqd->cp_hqd_pq_rptr_report_addr_hi =
4779 upper_32_bits(wb_gpu_addr) & 0xffff;
4781 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4782 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4783 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4784 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4787 /* enable the doorbell if requested */
4788 if (ring->use_doorbell) {
4789 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4790 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4791 DOORBELL_OFFSET, ring->doorbell_index);
4793 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4795 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4796 DOORBELL_SOURCE, 0);
4797 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4801 mqd->cp_hqd_pq_doorbell_control = tmp;
4803 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4805 mqd->cp_hqd_pq_wptr = ring->wptr;
4806 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4808 /* set the vmid for the queue */
4809 mqd->cp_hqd_vmid = 0;
4811 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4812 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4813 mqd->cp_hqd_persistent_state = tmp;
4816 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4817 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4818 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4819 mqd->cp_hqd_ib_control = tmp;
4821 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4822 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4823 mqd->cp_hqd_iq_timer = tmp;
4825 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4826 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4827 mqd->cp_hqd_ctx_save_control = tmp;
4830 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4831 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4832 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4833 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4834 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4835 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4836 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4837 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4838 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4839 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4840 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4841 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4842 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4843 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4844 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4846 /* activate the queue */
4847 mqd->cp_hqd_active = 1;
4852 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4858 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4859 mqd_data = &mqd->cp_mqd_base_addr_lo;
4861 /* disable wptr polling */
4862 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4864 /* program all HQD registers */
4865 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4866 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4868 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4869 * This is safe since EOP RPTR==WPTR for any inactive HQD
4870 * on ASICs that do not support context-save.
4871 * EOP writes/reads can start anywhere in the ring.
4873 if (adev->asic_type != CHIP_TONGA) {
4874 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4875 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4876 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4879 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4880 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4882 /* activate the HQD */
4883 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4884 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4889 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4891 struct amdgpu_device *adev = ring->adev;
4892 struct vi_mqd *mqd = ring->mqd_ptr;
4893 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4895 gfx_v8_0_kiq_setting(ring);
4897 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4898 /* reset MQD to a clean status */
4899 if (adev->gfx.mec.mqd_backup[mqd_idx])
4900 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4902 /* reset ring buffer */
4904 amdgpu_ring_clear_ring(ring);
4905 mutex_lock(&adev->srbm_mutex);
4906 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4907 gfx_v8_0_mqd_commit(adev, mqd);
4908 vi_srbm_select(adev, 0, 0, 0, 0);
4909 mutex_unlock(&adev->srbm_mutex);
4911 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4912 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4913 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4914 mutex_lock(&adev->srbm_mutex);
4915 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4916 gfx_v8_0_mqd_init(ring);
4917 gfx_v8_0_mqd_commit(adev, mqd);
4918 vi_srbm_select(adev, 0, 0, 0, 0);
4919 mutex_unlock(&adev->srbm_mutex);
4921 if (adev->gfx.mec.mqd_backup[mqd_idx])
4922 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4928 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4930 struct amdgpu_device *adev = ring->adev;
4931 struct vi_mqd *mqd = ring->mqd_ptr;
4932 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4934 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4935 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4936 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4937 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4938 mutex_lock(&adev->srbm_mutex);
4939 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4940 gfx_v8_0_mqd_init(ring);
4941 vi_srbm_select(adev, 0, 0, 0, 0);
4942 mutex_unlock(&adev->srbm_mutex);
4944 if (adev->gfx.mec.mqd_backup[mqd_idx])
4945 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4946 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4947 /* reset MQD to a clean status */
4948 if (adev->gfx.mec.mqd_backup[mqd_idx])
4949 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4950 /* reset ring buffer */
4952 amdgpu_ring_clear_ring(ring);
4954 amdgpu_ring_clear_ring(ring);
4959 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4961 if (adev->asic_type > CHIP_TONGA) {
4962 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4963 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4965 /* enable doorbells */
4966 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4969 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4971 struct amdgpu_ring *ring = NULL;
4974 gfx_v8_0_cp_compute_enable(adev, true);
4976 ring = &adev->gfx.kiq.ring;
4978 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4979 if (unlikely(r != 0))
4982 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4984 r = gfx_v8_0_kiq_init_queue(ring);
4985 amdgpu_bo_kunmap(ring->mqd_obj);
4986 ring->mqd_ptr = NULL;
4988 amdgpu_bo_unreserve(ring->mqd_obj);
4992 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4993 ring = &adev->gfx.compute_ring[i];
4995 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4996 if (unlikely(r != 0))
4998 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5000 r = gfx_v8_0_kcq_init_queue(ring);
5001 amdgpu_bo_kunmap(ring->mqd_obj);
5002 ring->mqd_ptr = NULL;
5004 amdgpu_bo_unreserve(ring->mqd_obj);
5009 gfx_v8_0_set_mec_doorbell_range(adev);
5011 r = gfx_v8_0_kiq_kcq_enable(adev);
5016 ring = &adev->gfx.kiq.ring;
5018 r = amdgpu_ring_test_ring(ring);
5020 ring->ready = false;
5025 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5026 ring = &adev->gfx.compute_ring[i];
5028 r = amdgpu_ring_test_ring(ring);
5030 ring->ready = false;
5037 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5041 if (!(adev->flags & AMD_IS_APU))
5042 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5044 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5045 /* legacy firmware loading */
5046 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5050 r = gfx_v8_0_cp_compute_load_microcode(adev);
5055 r = gfx_v8_0_cp_gfx_resume(adev);
5059 r = gfx_v8_0_kiq_resume(adev);
5063 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5068 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5070 gfx_v8_0_cp_gfx_enable(adev, enable);
5071 gfx_v8_0_cp_compute_enable(adev, enable);
5074 static int gfx_v8_0_hw_init(void *handle)
5077 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5079 gfx_v8_0_init_golden_registers(adev);
5080 gfx_v8_0_gpu_init(adev);
5082 r = gfx_v8_0_rlc_resume(adev);
5086 r = gfx_v8_0_cp_resume(adev);
5091 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5093 struct amdgpu_device *adev = kiq_ring->adev;
5094 uint32_t scratch, tmp = 0;
5097 r = amdgpu_gfx_scratch_get(adev, &scratch);
5099 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5102 WREG32(scratch, 0xCAFEDEAD);
5104 r = amdgpu_ring_alloc(kiq_ring, 10);
5106 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5107 amdgpu_gfx_scratch_free(adev, scratch);
5112 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5113 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5114 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5115 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5116 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5117 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5118 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5119 amdgpu_ring_write(kiq_ring, 0);
5120 amdgpu_ring_write(kiq_ring, 0);
5121 amdgpu_ring_write(kiq_ring, 0);
5122 /* write to scratch for completion */
5123 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5124 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5125 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5126 amdgpu_ring_commit(kiq_ring);
5128 for (i = 0; i < adev->usec_timeout; i++) {
5129 tmp = RREG32(scratch);
5130 if (tmp == 0xDEADBEEF)
5134 if (i >= adev->usec_timeout) {
5135 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5138 amdgpu_gfx_scratch_free(adev, scratch);
5142 static int gfx_v8_0_hw_fini(void *handle)
5144 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5147 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5148 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5150 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5152 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5154 /* disable KCQ to avoid CPC touch memory not valid anymore */
5155 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5156 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5158 if (amdgpu_sriov_vf(adev)) {
5159 pr_debug("For SRIOV client, shouldn't do anything.\n");
5162 gfx_v8_0_cp_enable(adev, false);
5163 gfx_v8_0_rlc_stop(adev);
5165 amdgpu_device_ip_set_powergating_state(adev,
5166 AMD_IP_BLOCK_TYPE_GFX,
5167 AMD_PG_STATE_UNGATE);
5172 static int gfx_v8_0_suspend(void *handle)
5174 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5175 adev->gfx.in_suspend = true;
5176 return gfx_v8_0_hw_fini(adev);
5179 static int gfx_v8_0_resume(void *handle)
5182 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5184 r = gfx_v8_0_hw_init(adev);
5185 adev->gfx.in_suspend = false;
5189 static bool gfx_v8_0_is_idle(void *handle)
5191 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5193 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5199 static int gfx_v8_0_wait_for_idle(void *handle)
5202 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5204 for (i = 0; i < adev->usec_timeout; i++) {
5205 if (gfx_v8_0_is_idle(handle))
5213 static bool gfx_v8_0_check_soft_reset(void *handle)
5215 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5216 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5220 tmp = RREG32(mmGRBM_STATUS);
5221 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5222 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5223 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5224 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5225 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5226 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5227 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5228 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5229 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5230 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5231 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5232 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5233 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5237 tmp = RREG32(mmGRBM_STATUS2);
5238 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5239 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5240 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5242 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5243 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5244 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5245 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5247 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5249 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5251 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5252 SOFT_RESET_GRBM, 1);
5256 tmp = RREG32(mmSRBM_STATUS);
5257 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5258 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5259 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5260 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5261 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5262 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5264 if (grbm_soft_reset || srbm_soft_reset) {
5265 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5266 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5269 adev->gfx.grbm_soft_reset = 0;
5270 adev->gfx.srbm_soft_reset = 0;
5275 static int gfx_v8_0_pre_soft_reset(void *handle)
5277 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5278 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5280 if ((!adev->gfx.grbm_soft_reset) &&
5281 (!adev->gfx.srbm_soft_reset))
5284 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5285 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5288 gfx_v8_0_rlc_stop(adev);
5290 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5291 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5292 /* Disable GFX parsing/prefetching */
5293 gfx_v8_0_cp_gfx_enable(adev, false);
5295 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5296 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5297 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5298 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5301 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5302 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5304 mutex_lock(&adev->srbm_mutex);
5305 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5306 gfx_v8_0_deactivate_hqd(adev, 2);
5307 vi_srbm_select(adev, 0, 0, 0, 0);
5308 mutex_unlock(&adev->srbm_mutex);
5310 /* Disable MEC parsing/prefetching */
5311 gfx_v8_0_cp_compute_enable(adev, false);
5317 static int gfx_v8_0_soft_reset(void *handle)
5319 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5320 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5323 if ((!adev->gfx.grbm_soft_reset) &&
5324 (!adev->gfx.srbm_soft_reset))
5327 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5328 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5330 if (grbm_soft_reset || srbm_soft_reset) {
5331 tmp = RREG32(mmGMCON_DEBUG);
5332 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5333 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5334 WREG32(mmGMCON_DEBUG, tmp);
5338 if (grbm_soft_reset) {
5339 tmp = RREG32(mmGRBM_SOFT_RESET);
5340 tmp |= grbm_soft_reset;
5341 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5342 WREG32(mmGRBM_SOFT_RESET, tmp);
5343 tmp = RREG32(mmGRBM_SOFT_RESET);
5347 tmp &= ~grbm_soft_reset;
5348 WREG32(mmGRBM_SOFT_RESET, tmp);
5349 tmp = RREG32(mmGRBM_SOFT_RESET);
5352 if (srbm_soft_reset) {
5353 tmp = RREG32(mmSRBM_SOFT_RESET);
5354 tmp |= srbm_soft_reset;
5355 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5356 WREG32(mmSRBM_SOFT_RESET, tmp);
5357 tmp = RREG32(mmSRBM_SOFT_RESET);
5361 tmp &= ~srbm_soft_reset;
5362 WREG32(mmSRBM_SOFT_RESET, tmp);
5363 tmp = RREG32(mmSRBM_SOFT_RESET);
5366 if (grbm_soft_reset || srbm_soft_reset) {
5367 tmp = RREG32(mmGMCON_DEBUG);
5368 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5369 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5370 WREG32(mmGMCON_DEBUG, tmp);
5373 /* Wait a little for things to settle down */
5379 static int gfx_v8_0_post_soft_reset(void *handle)
5381 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5382 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5384 if ((!adev->gfx.grbm_soft_reset) &&
5385 (!adev->gfx.srbm_soft_reset))
5388 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5389 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5391 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5392 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5393 gfx_v8_0_cp_gfx_resume(adev);
5395 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5396 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5397 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5398 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5401 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5402 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5404 mutex_lock(&adev->srbm_mutex);
5405 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5406 gfx_v8_0_deactivate_hqd(adev, 2);
5407 vi_srbm_select(adev, 0, 0, 0, 0);
5408 mutex_unlock(&adev->srbm_mutex);
5410 gfx_v8_0_kiq_resume(adev);
5412 gfx_v8_0_rlc_start(adev);
5418 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5420 * @adev: amdgpu_device pointer
5422 * Fetches a GPU clock counter snapshot.
5423 * Returns the 64 bit clock counter snapshot.
5425 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5429 mutex_lock(&adev->gfx.gpu_clock_mutex);
5430 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5431 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5432 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5433 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5437 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5439 uint32_t gds_base, uint32_t gds_size,
5440 uint32_t gws_base, uint32_t gws_size,
5441 uint32_t oa_base, uint32_t oa_size)
5443 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5444 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5446 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5447 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5449 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5450 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5453 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5454 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5455 WRITE_DATA_DST_SEL(0)));
5456 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5457 amdgpu_ring_write(ring, 0);
5458 amdgpu_ring_write(ring, gds_base);
5461 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5462 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5463 WRITE_DATA_DST_SEL(0)));
5464 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5465 amdgpu_ring_write(ring, 0);
5466 amdgpu_ring_write(ring, gds_size);
5469 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5470 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5471 WRITE_DATA_DST_SEL(0)));
5472 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5473 amdgpu_ring_write(ring, 0);
5474 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5477 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5478 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5479 WRITE_DATA_DST_SEL(0)));
5480 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5481 amdgpu_ring_write(ring, 0);
5482 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5485 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5487 WREG32(mmSQ_IND_INDEX,
5488 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5489 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5490 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5491 (SQ_IND_INDEX__FORCE_READ_MASK));
5492 return RREG32(mmSQ_IND_DATA);
5495 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5496 uint32_t wave, uint32_t thread,
5497 uint32_t regno, uint32_t num, uint32_t *out)
5499 WREG32(mmSQ_IND_INDEX,
5500 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5501 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5502 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5503 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5504 (SQ_IND_INDEX__FORCE_READ_MASK) |
5505 (SQ_IND_INDEX__AUTO_INCR_MASK));
5507 *(out++) = RREG32(mmSQ_IND_DATA);
5510 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5512 /* type 0 wave data */
5513 dst[(*no_fields)++] = 0;
5514 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5515 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5516 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5517 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5518 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5519 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5520 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5521 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5522 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5523 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5524 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5525 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5526 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5527 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5528 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5529 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5530 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5531 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5534 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5535 uint32_t wave, uint32_t start,
5536 uint32_t size, uint32_t *dst)
5539 adev, simd, wave, 0,
5540 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5544 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5545 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5546 .select_se_sh = &gfx_v8_0_select_se_sh,
5547 .read_wave_data = &gfx_v8_0_read_wave_data,
5548 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5549 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5552 static int gfx_v8_0_early_init(void *handle)
5554 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5556 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5557 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5558 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5559 gfx_v8_0_set_ring_funcs(adev);
5560 gfx_v8_0_set_irq_funcs(adev);
5561 gfx_v8_0_set_gds_init(adev);
5562 gfx_v8_0_set_rlc_funcs(adev);
5567 static int gfx_v8_0_late_init(void *handle)
5569 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5572 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5576 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5580 /* requires IBs so do in late init after IB pool is initialized */
5581 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5585 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5587 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5591 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5594 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5599 amdgpu_device_ip_set_powergating_state(adev,
5600 AMD_IP_BLOCK_TYPE_GFX,
5606 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5609 if ((adev->asic_type == CHIP_POLARIS11) ||
5610 (adev->asic_type == CHIP_POLARIS12) ||
5611 (adev->asic_type == CHIP_VEGAM))
5612 /* Send msg to SMU via Powerplay */
5613 amdgpu_device_ip_set_powergating_state(adev,
5614 AMD_IP_BLOCK_TYPE_SMC,
5616 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5618 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5621 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5624 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5627 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5630 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5633 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5636 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5639 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5642 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5644 /* Read any GFX register to wake up GFX. */
5646 RREG32(mmDB_RENDER_CONTROL);
5649 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5652 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5653 cz_enable_gfx_cg_power_gating(adev, true);
5654 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5655 cz_enable_gfx_pipeline_power_gating(adev, true);
5657 cz_enable_gfx_cg_power_gating(adev, false);
5658 cz_enable_gfx_pipeline_power_gating(adev, false);
5662 static int gfx_v8_0_set_powergating_state(void *handle,
5663 enum amd_powergating_state state)
5665 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5666 bool enable = (state == AMD_PG_STATE_GATE);
5668 if (amdgpu_sriov_vf(adev))
5671 switch (adev->asic_type) {
5675 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5676 cz_enable_sck_slow_down_on_power_up(adev, true);
5677 cz_enable_sck_slow_down_on_power_down(adev, true);
5679 cz_enable_sck_slow_down_on_power_up(adev, false);
5680 cz_enable_sck_slow_down_on_power_down(adev, false);
5682 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5683 cz_enable_cp_power_gating(adev, true);
5685 cz_enable_cp_power_gating(adev, false);
5687 cz_update_gfx_cg_power_gating(adev, enable);
5689 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5690 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5692 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5694 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5695 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5697 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5699 case CHIP_POLARIS11:
5700 case CHIP_POLARIS12:
5702 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5703 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5705 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5707 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5708 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5710 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5712 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5713 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5715 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5724 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5726 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5729 if (amdgpu_sriov_vf(adev))
5732 /* AMD_CG_SUPPORT_GFX_MGCG */
5733 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5734 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5735 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5737 /* AMD_CG_SUPPORT_GFX_CGLG */
5738 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5739 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5740 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5742 /* AMD_CG_SUPPORT_GFX_CGLS */
5743 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5744 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5746 /* AMD_CG_SUPPORT_GFX_CGTS */
5747 data = RREG32(mmCGTS_SM_CTRL_REG);
5748 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5749 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5751 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5752 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5753 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5755 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5756 data = RREG32(mmRLC_MEM_SLP_CNTL);
5757 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5758 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5760 /* AMD_CG_SUPPORT_GFX_CP_LS */
5761 data = RREG32(mmCP_MEM_SLP_CNTL);
5762 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5763 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5766 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5767 uint32_t reg_addr, uint32_t cmd)
5771 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5773 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5774 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5776 data = RREG32(mmRLC_SERDES_WR_CTRL);
5777 if (adev->asic_type == CHIP_STONEY)
5778 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5779 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5780 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5781 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5782 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5783 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5784 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5785 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5786 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5788 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5789 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5790 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5791 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5792 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5793 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5794 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5795 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5796 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5797 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5798 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5799 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5800 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5801 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5802 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5804 WREG32(mmRLC_SERDES_WR_CTRL, data);
5807 #define MSG_ENTER_RLC_SAFE_MODE 1
5808 #define MSG_EXIT_RLC_SAFE_MODE 0
5809 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5810 #define RLC_GPR_REG2__REQ__SHIFT 0
5811 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5812 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5814 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5819 data = RREG32(mmRLC_CNTL);
5820 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5823 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5824 data |= RLC_SAFE_MODE__CMD_MASK;
5825 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5826 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5827 WREG32(mmRLC_SAFE_MODE, data);
5829 for (i = 0; i < adev->usec_timeout; i++) {
5830 if ((RREG32(mmRLC_GPM_STAT) &
5831 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5832 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5833 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5834 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5839 for (i = 0; i < adev->usec_timeout; i++) {
5840 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5844 adev->gfx.rlc.in_safe_mode = true;
5848 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5853 data = RREG32(mmRLC_CNTL);
5854 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5857 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5858 if (adev->gfx.rlc.in_safe_mode) {
5859 data |= RLC_SAFE_MODE__CMD_MASK;
5860 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5861 WREG32(mmRLC_SAFE_MODE, data);
5862 adev->gfx.rlc.in_safe_mode = false;
5866 for (i = 0; i < adev->usec_timeout; i++) {
5867 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5873 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5874 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5875 .exit_safe_mode = iceland_exit_rlc_safe_mode
5878 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5881 uint32_t temp, data;
5883 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5885 /* It is disabled by HW by default */
5886 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5887 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5888 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5889 /* 1 - RLC memory Light sleep */
5890 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5892 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5893 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5896 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5897 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5898 if (adev->flags & AMD_IS_APU)
5899 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5900 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5901 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5903 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5904 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5905 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5906 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5909 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5911 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5912 gfx_v8_0_wait_for_rlc_serdes(adev);
5914 /* 5 - clear mgcg override */
5915 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5917 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5918 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5919 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5920 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5921 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5922 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5923 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5924 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5925 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5926 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5927 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5928 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5930 WREG32(mmCGTS_SM_CTRL_REG, data);
5934 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5935 gfx_v8_0_wait_for_rlc_serdes(adev);
5937 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5938 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5940 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5941 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5942 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5944 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5946 /* 2 - disable MGLS in RLC */
5947 data = RREG32(mmRLC_MEM_SLP_CNTL);
5948 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5949 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5950 WREG32(mmRLC_MEM_SLP_CNTL, data);
5953 /* 3 - disable MGLS in CP */
5954 data = RREG32(mmCP_MEM_SLP_CNTL);
5955 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5956 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5957 WREG32(mmCP_MEM_SLP_CNTL, data);
5960 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5961 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5962 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5963 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5965 WREG32(mmCGTS_SM_CTRL_REG, data);
5967 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5968 gfx_v8_0_wait_for_rlc_serdes(adev);
5970 /* 6 - set mgcg override */
5971 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5975 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5976 gfx_v8_0_wait_for_rlc_serdes(adev);
5979 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5982 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5985 uint32_t temp, temp1, data, data1;
5987 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5989 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5991 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5992 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5993 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5995 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5997 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5998 gfx_v8_0_wait_for_rlc_serdes(adev);
6000 /* 2 - clear cgcg override */
6001 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6003 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6004 gfx_v8_0_wait_for_rlc_serdes(adev);
6006 /* 3 - write cmd to set CGLS */
6007 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6009 /* 4 - enable cgcg */
6010 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6012 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6014 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6016 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6017 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6020 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6022 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6026 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6028 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6029 * Cmp_busy/GFX_Idle interrupts
6031 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6033 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6034 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6037 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6038 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6039 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6041 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6043 /* read gfx register to wake up cgcg */
6044 RREG32(mmCB_CGTT_SCLK_CTRL);
6045 RREG32(mmCB_CGTT_SCLK_CTRL);
6046 RREG32(mmCB_CGTT_SCLK_CTRL);
6047 RREG32(mmCB_CGTT_SCLK_CTRL);
6049 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6050 gfx_v8_0_wait_for_rlc_serdes(adev);
6052 /* write cmd to Set CGCG Overrride */
6053 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6055 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6056 gfx_v8_0_wait_for_rlc_serdes(adev);
6058 /* write cmd to Clear CGLS */
6059 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6061 /* disable cgcg, cgls should be disabled too. */
6062 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6063 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6065 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6066 /* enable interrupts again for PG */
6067 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6070 gfx_v8_0_wait_for_rlc_serdes(adev);
6072 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6074 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6078 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6079 * === MGCG + MGLS + TS(CG/LS) ===
6081 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6082 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6084 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6085 * === CGCG + CGLS ===
6087 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6088 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6093 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6094 enum amd_clockgating_state state)
6096 uint32_t msg_id, pp_state = 0;
6097 uint32_t pp_support_state = 0;
6099 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6100 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6101 pp_support_state = PP_STATE_SUPPORT_LS;
6102 pp_state = PP_STATE_LS;
6104 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6105 pp_support_state |= PP_STATE_SUPPORT_CG;
6106 pp_state |= PP_STATE_CG;
6108 if (state == AMD_CG_STATE_UNGATE)
6111 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6115 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6116 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6119 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6120 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6121 pp_support_state = PP_STATE_SUPPORT_LS;
6122 pp_state = PP_STATE_LS;
6125 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6126 pp_support_state |= PP_STATE_SUPPORT_CG;
6127 pp_state |= PP_STATE_CG;
6130 if (state == AMD_CG_STATE_UNGATE)
6133 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6137 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6138 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6144 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6145 enum amd_clockgating_state state)
6148 uint32_t msg_id, pp_state = 0;
6149 uint32_t pp_support_state = 0;
6151 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6152 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6153 pp_support_state = PP_STATE_SUPPORT_LS;
6154 pp_state = PP_STATE_LS;
6156 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6157 pp_support_state |= PP_STATE_SUPPORT_CG;
6158 pp_state |= PP_STATE_CG;
6160 if (state == AMD_CG_STATE_UNGATE)
6163 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6167 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6168 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6171 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6172 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6173 pp_support_state = PP_STATE_SUPPORT_LS;
6174 pp_state = PP_STATE_LS;
6176 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6177 pp_support_state |= PP_STATE_SUPPORT_CG;
6178 pp_state |= PP_STATE_CG;
6180 if (state == AMD_CG_STATE_UNGATE)
6183 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6187 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6188 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6191 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6192 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6193 pp_support_state = PP_STATE_SUPPORT_LS;
6194 pp_state = PP_STATE_LS;
6197 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6198 pp_support_state |= PP_STATE_SUPPORT_CG;
6199 pp_state |= PP_STATE_CG;
6202 if (state == AMD_CG_STATE_UNGATE)
6205 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6209 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6210 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6213 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6214 pp_support_state = PP_STATE_SUPPORT_LS;
6216 if (state == AMD_CG_STATE_UNGATE)
6219 pp_state = PP_STATE_LS;
6221 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6225 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6226 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6229 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6230 pp_support_state = PP_STATE_SUPPORT_LS;
6232 if (state == AMD_CG_STATE_UNGATE)
6235 pp_state = PP_STATE_LS;
6236 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6240 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6241 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6247 static int gfx_v8_0_set_clockgating_state(void *handle,
6248 enum amd_clockgating_state state)
6250 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6252 if (amdgpu_sriov_vf(adev))
6255 switch (adev->asic_type) {
6259 gfx_v8_0_update_gfx_clock_gating(adev,
6260 state == AMD_CG_STATE_GATE);
6263 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6265 case CHIP_POLARIS10:
6266 case CHIP_POLARIS11:
6267 case CHIP_POLARIS12:
6269 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6277 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6279 return ring->adev->wb.wb[ring->rptr_offs];
6282 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6284 struct amdgpu_device *adev = ring->adev;
6286 if (ring->use_doorbell)
6287 /* XXX check if swapping is necessary on BE */
6288 return ring->adev->wb.wb[ring->wptr_offs];
6290 return RREG32(mmCP_RB0_WPTR);
6293 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6295 struct amdgpu_device *adev = ring->adev;
6297 if (ring->use_doorbell) {
6298 /* XXX check if swapping is necessary on BE */
6299 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6300 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6302 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6303 (void)RREG32(mmCP_RB0_WPTR);
6307 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6309 u32 ref_and_mask, reg_mem_engine;
6311 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6312 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6315 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6318 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6325 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6326 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6329 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6330 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6331 WAIT_REG_MEM_FUNCTION(3) | /* == */
6333 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6334 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6335 amdgpu_ring_write(ring, ref_and_mask);
6336 amdgpu_ring_write(ring, ref_and_mask);
6337 amdgpu_ring_write(ring, 0x20); /* poll interval */
6340 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6342 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6343 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6346 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6347 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6351 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6352 struct amdgpu_ib *ib,
6353 unsigned vmid, bool ctx_switch)
6355 u32 header, control = 0;
6357 if (ib->flags & AMDGPU_IB_FLAG_CE)
6358 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6360 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6362 control |= ib->length_dw | (vmid << 24);
6364 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6365 control |= INDIRECT_BUFFER_PRE_ENB(1);
6367 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6368 gfx_v8_0_ring_emit_de_meta(ring);
6371 amdgpu_ring_write(ring, header);
6372 amdgpu_ring_write(ring,
6376 (ib->gpu_addr & 0xFFFFFFFC));
6377 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6378 amdgpu_ring_write(ring, control);
6381 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6382 struct amdgpu_ib *ib,
6383 unsigned vmid, bool ctx_switch)
6385 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6387 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6388 amdgpu_ring_write(ring,
6392 (ib->gpu_addr & 0xFFFFFFFC));
6393 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6394 amdgpu_ring_write(ring, control);
6397 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6398 u64 seq, unsigned flags)
6400 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6401 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6403 /* EVENT_WRITE_EOP - flush caches, send int */
6404 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6405 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6407 EOP_TC_WB_ACTION_EN |
6408 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6410 amdgpu_ring_write(ring, addr & 0xfffffffc);
6411 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6412 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6413 amdgpu_ring_write(ring, lower_32_bits(seq));
6414 amdgpu_ring_write(ring, upper_32_bits(seq));
6418 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6420 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6421 uint32_t seq = ring->fence_drv.sync_seq;
6422 uint64_t addr = ring->fence_drv.gpu_addr;
6424 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6425 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6426 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6427 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6428 amdgpu_ring_write(ring, addr & 0xfffffffc);
6429 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6430 amdgpu_ring_write(ring, seq);
6431 amdgpu_ring_write(ring, 0xffffffff);
6432 amdgpu_ring_write(ring, 4); /* poll interval */
6435 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6436 unsigned vmid, uint64_t pd_addr)
6438 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6440 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6442 /* wait for the invalidate to complete */
6443 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6444 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6445 WAIT_REG_MEM_FUNCTION(0) | /* always */
6446 WAIT_REG_MEM_ENGINE(0))); /* me */
6447 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6448 amdgpu_ring_write(ring, 0);
6449 amdgpu_ring_write(ring, 0); /* ref */
6450 amdgpu_ring_write(ring, 0); /* mask */
6451 amdgpu_ring_write(ring, 0x20); /* poll interval */
6453 /* compute doesn't have PFP */
6455 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6456 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6457 amdgpu_ring_write(ring, 0x0);
6461 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6463 return ring->adev->wb.wb[ring->wptr_offs];
6466 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6468 struct amdgpu_device *adev = ring->adev;
6470 /* XXX check if swapping is necessary on BE */
6471 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6472 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6475 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6478 struct amdgpu_device *adev = ring->adev;
6479 int pipe_num, tmp, reg;
6480 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6482 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6484 /* first me only has 2 entries, GFX and HP3D */
6488 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6490 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6494 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6495 struct amdgpu_ring *ring,
6500 struct amdgpu_ring *iring;
6502 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6503 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6505 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6507 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6509 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6510 /* Clear all reservations - everyone reacquires all resources */
6511 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6512 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6515 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6516 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6519 /* Lower all pipes without a current reservation */
6520 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6521 iring = &adev->gfx.gfx_ring[i];
6522 pipe = amdgpu_gfx_queue_to_bit(adev,
6526 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6527 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6530 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6531 iring = &adev->gfx.compute_ring[i];
6532 pipe = amdgpu_gfx_queue_to_bit(adev,
6536 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6537 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6541 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6544 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6545 struct amdgpu_ring *ring,
6548 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6549 uint32_t queue_priority = acquire ? 0xf : 0x0;
6551 mutex_lock(&adev->srbm_mutex);
6552 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6554 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6555 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6557 vi_srbm_select(adev, 0, 0, 0, 0);
6558 mutex_unlock(&adev->srbm_mutex);
6560 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6561 enum drm_sched_priority priority)
6563 struct amdgpu_device *adev = ring->adev;
6564 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6566 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6569 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6570 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6573 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6577 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6578 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6580 /* RELEASE_MEM - flush caches, send int */
6581 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6582 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6584 EOP_TC_WB_ACTION_EN |
6585 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6587 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6588 amdgpu_ring_write(ring, addr & 0xfffffffc);
6589 amdgpu_ring_write(ring, upper_32_bits(addr));
6590 amdgpu_ring_write(ring, lower_32_bits(seq));
6591 amdgpu_ring_write(ring, upper_32_bits(seq));
6594 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6595 u64 seq, unsigned int flags)
6597 /* we only allocate 32bit for each seq wb address */
6598 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6600 /* write fence seq to the "addr" */
6601 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6602 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6603 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6604 amdgpu_ring_write(ring, lower_32_bits(addr));
6605 amdgpu_ring_write(ring, upper_32_bits(addr));
6606 amdgpu_ring_write(ring, lower_32_bits(seq));
6608 if (flags & AMDGPU_FENCE_FLAG_INT) {
6609 /* set register to trigger INT */
6610 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6611 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6612 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6613 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6614 amdgpu_ring_write(ring, 0);
6615 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6619 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6621 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6622 amdgpu_ring_write(ring, 0);
6625 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6629 if (amdgpu_sriov_vf(ring->adev))
6630 gfx_v8_0_ring_emit_ce_meta(ring);
6632 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6633 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6634 gfx_v8_0_ring_emit_vgt_flush(ring);
6635 /* set load_global_config & load_global_uconfig */
6637 /* set load_cs_sh_regs */
6639 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6642 /* set load_ce_ram if preamble presented */
6643 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6646 /* still load_ce_ram if this is the first time preamble presented
6647 * although there is no context switch happens.
6649 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6653 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6654 amdgpu_ring_write(ring, dw2);
6655 amdgpu_ring_write(ring, 0);
6658 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6662 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6663 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6664 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6665 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6666 ret = ring->wptr & ring->buf_mask;
6667 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6671 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6675 BUG_ON(offset > ring->buf_mask);
6676 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6678 cur = (ring->wptr & ring->buf_mask) - 1;
6679 if (likely(cur > offset))
6680 ring->ring[offset] = cur - offset;
6682 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6685 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6687 struct amdgpu_device *adev = ring->adev;
6689 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6690 amdgpu_ring_write(ring, 0 | /* src: register*/
6691 (5 << 8) | /* dst: memory */
6692 (1 << 20)); /* write confirm */
6693 amdgpu_ring_write(ring, reg);
6694 amdgpu_ring_write(ring, 0);
6695 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6696 adev->virt.reg_val_offs * 4));
6697 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6698 adev->virt.reg_val_offs * 4));
6701 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6706 switch (ring->funcs->type) {
6707 case AMDGPU_RING_TYPE_GFX:
6708 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6710 case AMDGPU_RING_TYPE_KIQ:
6711 cmd = 1 << 16; /* no inc addr */
6718 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6719 amdgpu_ring_write(ring, cmd);
6720 amdgpu_ring_write(ring, reg);
6721 amdgpu_ring_write(ring, 0);
6722 amdgpu_ring_write(ring, val);
6725 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6726 enum amdgpu_interrupt_state state)
6728 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6729 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6732 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6734 enum amdgpu_interrupt_state state)
6736 u32 mec_int_cntl, mec_int_cntl_reg;
6739 * amdgpu controls only the first MEC. That's why this function only
6740 * handles the setting of interrupts for this specific MEC. All other
6741 * pipes' interrupts are set by amdkfd.
6747 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6750 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6753 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6756 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6759 DRM_DEBUG("invalid pipe %d\n", pipe);
6763 DRM_DEBUG("invalid me %d\n", me);
6768 case AMDGPU_IRQ_STATE_DISABLE:
6769 mec_int_cntl = RREG32(mec_int_cntl_reg);
6770 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6771 WREG32(mec_int_cntl_reg, mec_int_cntl);
6773 case AMDGPU_IRQ_STATE_ENABLE:
6774 mec_int_cntl = RREG32(mec_int_cntl_reg);
6775 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6776 WREG32(mec_int_cntl_reg, mec_int_cntl);
6783 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6784 struct amdgpu_irq_src *source,
6786 enum amdgpu_interrupt_state state)
6788 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6789 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6794 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6795 struct amdgpu_irq_src *source,
6797 enum amdgpu_interrupt_state state)
6799 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6800 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6805 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6806 struct amdgpu_irq_src *src,
6808 enum amdgpu_interrupt_state state)
6811 case AMDGPU_CP_IRQ_GFX_EOP:
6812 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6814 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6815 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6817 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6818 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6820 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6821 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6823 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6824 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6826 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6827 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6829 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6830 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6832 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6833 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6835 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6836 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6844 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6845 struct amdgpu_irq_src *source,
6847 enum amdgpu_interrupt_state state)
6852 case AMDGPU_IRQ_STATE_DISABLE:
6856 case AMDGPU_IRQ_STATE_ENABLE:
6864 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6865 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6866 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6867 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6868 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6869 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6871 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6873 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6875 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6877 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6879 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6881 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6883 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6889 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6890 struct amdgpu_irq_src *source,
6892 enum amdgpu_interrupt_state state)
6897 case AMDGPU_IRQ_STATE_DISABLE:
6901 case AMDGPU_IRQ_STATE_ENABLE:
6909 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6915 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6916 struct amdgpu_irq_src *source,
6917 struct amdgpu_iv_entry *entry)
6920 u8 me_id, pipe_id, queue_id;
6921 struct amdgpu_ring *ring;
6923 DRM_DEBUG("IH: CP EOP\n");
6924 me_id = (entry->ring_id & 0x0c) >> 2;
6925 pipe_id = (entry->ring_id & 0x03) >> 0;
6926 queue_id = (entry->ring_id & 0x70) >> 4;
6930 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6934 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6935 ring = &adev->gfx.compute_ring[i];
6936 /* Per-queue interrupt is supported for MEC starting from VI.
6937 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6939 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6940 amdgpu_fence_process(ring);
6947 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6948 struct amdgpu_irq_src *source,
6949 struct amdgpu_iv_entry *entry)
6951 DRM_ERROR("Illegal register access in command stream\n");
6952 schedule_work(&adev->reset_work);
6956 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6957 struct amdgpu_irq_src *source,
6958 struct amdgpu_iv_entry *entry)
6960 DRM_ERROR("Illegal instruction in command stream\n");
6961 schedule_work(&adev->reset_work);
6965 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6966 struct amdgpu_irq_src *source,
6967 struct amdgpu_iv_entry *entry)
6969 DRM_ERROR("CP EDC/ECC error detected.");
6973 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6975 u32 enc, se_id, sh_id, cu_id;
6977 int sq_edc_source = -1;
6979 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6980 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6984 DRM_INFO("SQ general purpose intr detected:"
6985 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6986 "host_cmd_overflow %d, cmd_timestamp %d,"
6987 "reg_timestamp %d, thread_trace_buff_full %d,"
6988 "wlt %d, thread_trace %d.\n",
6990 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6991 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6992 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6993 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6994 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6995 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6996 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6997 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
7003 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
7004 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
7007 * This function can be called either directly from ISR
7008 * or from BH in which case we can access SQ_EDC_INFO
7012 mutex_lock(&adev->grbm_idx_mutex);
7013 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
7015 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
7017 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7018 mutex_unlock(&adev->grbm_idx_mutex);
7022 sprintf(type, "instruction intr");
7024 sprintf(type, "EDC/ECC error");
7028 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
7029 "trap %s, sq_ed_info.source %s.\n",
7030 type, se_id, sh_id, cu_id,
7031 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
7032 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
7033 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
7034 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
7035 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
7039 DRM_ERROR("SQ invalid encoding type\n.");
7043 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
7046 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
7047 struct sq_work *sq_work = container_of(work, struct sq_work, work);
7049 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
7052 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7053 struct amdgpu_irq_src *source,
7054 struct amdgpu_iv_entry *entry)
7056 unsigned ih_data = entry->src_data[0];
7059 * Try to submit work so SQ_EDC_INFO can be accessed from
7060 * BH. If previous work submission hasn't finished yet
7061 * just print whatever info is possible directly from the ISR.
7063 if (work_pending(&adev->gfx.sq_work.work)) {
7064 gfx_v8_0_parse_sq_irq(adev, ih_data);
7066 adev->gfx.sq_work.ih_data = ih_data;
7067 schedule_work(&adev->gfx.sq_work.work);
7073 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7074 struct amdgpu_irq_src *src,
7076 enum amdgpu_interrupt_state state)
7078 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7081 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7082 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7083 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7085 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7087 GENERIC2_INT_ENABLE,
7088 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7090 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7092 GENERIC2_INT_ENABLE,
7093 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7096 BUG(); /* kiq only support GENERIC2_INT now */
7102 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7103 struct amdgpu_irq_src *source,
7104 struct amdgpu_iv_entry *entry)
7106 u8 me_id, pipe_id, queue_id;
7107 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7109 me_id = (entry->ring_id & 0x0c) >> 2;
7110 pipe_id = (entry->ring_id & 0x03) >> 0;
7111 queue_id = (entry->ring_id & 0x70) >> 4;
7112 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7113 me_id, pipe_id, queue_id);
7115 amdgpu_fence_process(ring);
7119 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7121 .early_init = gfx_v8_0_early_init,
7122 .late_init = gfx_v8_0_late_init,
7123 .sw_init = gfx_v8_0_sw_init,
7124 .sw_fini = gfx_v8_0_sw_fini,
7125 .hw_init = gfx_v8_0_hw_init,
7126 .hw_fini = gfx_v8_0_hw_fini,
7127 .suspend = gfx_v8_0_suspend,
7128 .resume = gfx_v8_0_resume,
7129 .is_idle = gfx_v8_0_is_idle,
7130 .wait_for_idle = gfx_v8_0_wait_for_idle,
7131 .check_soft_reset = gfx_v8_0_check_soft_reset,
7132 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7133 .soft_reset = gfx_v8_0_soft_reset,
7134 .post_soft_reset = gfx_v8_0_post_soft_reset,
7135 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7136 .set_powergating_state = gfx_v8_0_set_powergating_state,
7137 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7140 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7141 .type = AMDGPU_RING_TYPE_GFX,
7143 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7144 .support_64bit_ptrs = false,
7145 .get_rptr = gfx_v8_0_ring_get_rptr,
7146 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7147 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7148 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7150 7 + /* PIPELINE_SYNC */
7151 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7152 8 + /* FENCE for VM_FLUSH */
7153 20 + /* GDS switch */
7154 4 + /* double SWITCH_BUFFER,
7155 the first COND_EXEC jump to the place just
7156 prior to this double SWITCH_BUFFER */
7164 8 + 8 + /* FENCE x2 */
7165 2, /* SWITCH_BUFFER */
7166 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7167 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7168 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7169 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7170 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7171 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7172 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7173 .test_ring = gfx_v8_0_ring_test_ring,
7174 .test_ib = gfx_v8_0_ring_test_ib,
7175 .insert_nop = amdgpu_ring_insert_nop,
7176 .pad_ib = amdgpu_ring_generic_pad_ib,
7177 .emit_switch_buffer = gfx_v8_ring_emit_sb,
7178 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7179 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7180 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7181 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7184 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7185 .type = AMDGPU_RING_TYPE_COMPUTE,
7187 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7188 .support_64bit_ptrs = false,
7189 .get_rptr = gfx_v8_0_ring_get_rptr,
7190 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7191 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7193 20 + /* gfx_v8_0_ring_emit_gds_switch */
7194 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7195 5 + /* hdp_invalidate */
7196 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7197 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7198 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7199 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7200 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7201 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7202 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7203 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7204 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7205 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7206 .test_ring = gfx_v8_0_ring_test_ring,
7207 .test_ib = gfx_v8_0_ring_test_ib,
7208 .insert_nop = amdgpu_ring_insert_nop,
7209 .pad_ib = amdgpu_ring_generic_pad_ib,
7210 .set_priority = gfx_v8_0_ring_set_priority_compute,
7211 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7214 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7215 .type = AMDGPU_RING_TYPE_KIQ,
7217 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7218 .support_64bit_ptrs = false,
7219 .get_rptr = gfx_v8_0_ring_get_rptr,
7220 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7221 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7223 20 + /* gfx_v8_0_ring_emit_gds_switch */
7224 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7225 5 + /* hdp_invalidate */
7226 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7227 17 + /* gfx_v8_0_ring_emit_vm_flush */
7228 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7229 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7230 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7231 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7232 .test_ring = gfx_v8_0_ring_test_ring,
7233 .test_ib = gfx_v8_0_ring_test_ib,
7234 .insert_nop = amdgpu_ring_insert_nop,
7235 .pad_ib = amdgpu_ring_generic_pad_ib,
7236 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7237 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7240 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7244 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7246 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7247 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7249 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7250 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7253 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7254 .set = gfx_v8_0_set_eop_interrupt_state,
7255 .process = gfx_v8_0_eop_irq,
7258 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7259 .set = gfx_v8_0_set_priv_reg_fault_state,
7260 .process = gfx_v8_0_priv_reg_irq,
7263 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7264 .set = gfx_v8_0_set_priv_inst_fault_state,
7265 .process = gfx_v8_0_priv_inst_irq,
7268 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7269 .set = gfx_v8_0_kiq_set_interrupt_state,
7270 .process = gfx_v8_0_kiq_irq,
7273 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7274 .set = gfx_v8_0_set_cp_ecc_int_state,
7275 .process = gfx_v8_0_cp_ecc_error_irq,
7278 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7279 .set = gfx_v8_0_set_sq_int_state,
7280 .process = gfx_v8_0_sq_irq,
7283 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7285 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7286 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7288 adev->gfx.priv_reg_irq.num_types = 1;
7289 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7291 adev->gfx.priv_inst_irq.num_types = 1;
7292 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7294 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7295 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7297 adev->gfx.cp_ecc_error_irq.num_types = 1;
7298 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7300 adev->gfx.sq_irq.num_types = 1;
7301 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7304 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7306 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7309 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7311 /* init asci gds info */
7312 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7313 adev->gds.gws.total_size = 64;
7314 adev->gds.oa.total_size = 16;
7316 if (adev->gds.mem.total_size == 64 * 1024) {
7317 adev->gds.mem.gfx_partition_size = 4096;
7318 adev->gds.mem.cs_partition_size = 4096;
7320 adev->gds.gws.gfx_partition_size = 4;
7321 adev->gds.gws.cs_partition_size = 4;
7323 adev->gds.oa.gfx_partition_size = 4;
7324 adev->gds.oa.cs_partition_size = 1;
7326 adev->gds.mem.gfx_partition_size = 1024;
7327 adev->gds.mem.cs_partition_size = 1024;
7329 adev->gds.gws.gfx_partition_size = 16;
7330 adev->gds.gws.cs_partition_size = 16;
7332 adev->gds.oa.gfx_partition_size = 4;
7333 adev->gds.oa.cs_partition_size = 4;
7337 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7345 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7346 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7348 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7351 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7355 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7356 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7358 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7360 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7363 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7365 int i, j, k, counter, active_cu_number = 0;
7366 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7367 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7368 unsigned disable_masks[4 * 2];
7371 memset(cu_info, 0, sizeof(*cu_info));
7373 if (adev->flags & AMD_IS_APU)
7376 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7378 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7380 mutex_lock(&adev->grbm_idx_mutex);
7381 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7382 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7386 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7388 gfx_v8_0_set_user_cu_inactive_bitmap(
7389 adev, disable_masks[i * 2 + j]);
7390 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7391 cu_info->bitmap[i][j] = bitmap;
7393 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7394 if (bitmap & mask) {
7395 if (counter < ao_cu_num)
7401 active_cu_number += counter;
7403 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7404 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7407 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7408 mutex_unlock(&adev->grbm_idx_mutex);
7410 cu_info->number = active_cu_number;
7411 cu_info->ao_cu_mask = ao_cu_mask;
7412 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7413 cu_info->max_waves_per_simd = 10;
7414 cu_info->max_scratch_slots_per_cu = 32;
7415 cu_info->wave_front_size = 64;
7416 cu_info->lds_size = 64;
7419 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7421 .type = AMD_IP_BLOCK_TYPE_GFX,
7425 .funcs = &gfx_v8_0_ip_funcs,
7428 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7430 .type = AMD_IP_BLOCK_TYPE_GFX,
7434 .funcs = &gfx_v8_0_ip_funcs,
7437 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7439 uint64_t ce_payload_addr;
7442 struct vi_ce_ib_state regular;
7443 struct vi_ce_ib_state_chained_ib chained;
7446 if (ring->adev->virt.chained_ib_support) {
7447 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7448 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7449 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7451 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7452 offsetof(struct vi_gfx_meta_data, ce_payload);
7453 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7456 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7457 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7458 WRITE_DATA_DST_SEL(8) |
7460 WRITE_DATA_CACHE_POLICY(0));
7461 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7462 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7463 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7466 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7468 uint64_t de_payload_addr, gds_addr, csa_addr;
7471 struct vi_de_ib_state regular;
7472 struct vi_de_ib_state_chained_ib chained;
7475 csa_addr = amdgpu_csa_vaddr(ring->adev);
7476 gds_addr = csa_addr + 4096;
7477 if (ring->adev->virt.chained_ib_support) {
7478 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7479 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7480 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7481 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7483 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7484 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7485 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7486 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7489 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7490 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7491 WRITE_DATA_DST_SEL(8) |
7493 WRITE_DATA_CACHE_POLICY(0));
7494 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7495 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7496 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);