2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
51 #include "smu/smu_7_1_3_d.h"
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_NUM_COMPUTE_RINGS 8
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
82 /* BPM Register Address*/
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
92 #define RLC_FormatDirectRegListLength 14
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
143 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
161 static const u32 golden_settings_tonga_a11[] =
163 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
180 static const u32 tonga_golden_common_all[] =
182 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
192 static const u32 tonga_mgcg_cgcg_init[] =
194 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
271 static const u32 golden_settings_polaris11_a11[] =
273 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283 mmSQ_CONFIG, 0x07f80000, 0x01180000,
284 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
292 static const u32 polaris11_golden_common_all[] =
294 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
302 static const u32 golden_settings_polaris10_a11[] =
304 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315 mmSQ_CONFIG, 0x07f80000, 0x07180000,
316 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 static const u32 polaris10_golden_common_all[] =
325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
335 static const u32 fiji_golden_common_all[] =
337 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
349 static const u32 golden_settings_fiji_a10[] =
351 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
364 static const u32 fiji_mgcg_cgcg_init[] =
366 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
403 static const u32 golden_settings_iceland_a11[] =
405 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408 mmGB_GPU_ID, 0x0000000f, 0x00000000,
409 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
423 static const u32 iceland_golden_common_all[] =
425 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
435 static const u32 iceland_mgcg_cgcg_init[] =
437 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
503 static const u32 cz_golden_settings_a11[] =
505 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507 mmGB_GPU_ID, 0x0000000f, 0x00000000,
508 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
519 static const u32 cz_golden_common_all[] =
521 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
531 static const u32 cz_mgcg_cgcg_init[] =
533 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
610 static const u32 stoney_golden_settings_a11[] =
612 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613 mmGB_GPU_ID, 0x0000000f, 0x00000000,
614 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
624 static const u32 stoney_golden_common_all[] =
626 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
636 static const u32 stoney_mgcg_cgcg_init[] =
638 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
655 switch (adev->asic_type) {
657 amdgpu_program_register_sequence(adev,
658 iceland_mgcg_cgcg_init,
659 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
660 amdgpu_program_register_sequence(adev,
661 golden_settings_iceland_a11,
662 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
663 amdgpu_program_register_sequence(adev,
664 iceland_golden_common_all,
665 (const u32)ARRAY_SIZE(iceland_golden_common_all));
668 amdgpu_program_register_sequence(adev,
670 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
671 amdgpu_program_register_sequence(adev,
672 golden_settings_fiji_a10,
673 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
674 amdgpu_program_register_sequence(adev,
675 fiji_golden_common_all,
676 (const u32)ARRAY_SIZE(fiji_golden_common_all));
680 amdgpu_program_register_sequence(adev,
681 tonga_mgcg_cgcg_init,
682 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 golden_settings_tonga_a11,
685 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
686 amdgpu_program_register_sequence(adev,
687 tonga_golden_common_all,
688 (const u32)ARRAY_SIZE(tonga_golden_common_all));
691 amdgpu_program_register_sequence(adev,
692 golden_settings_polaris11_a11,
693 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
694 amdgpu_program_register_sequence(adev,
695 polaris11_golden_common_all,
696 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
699 amdgpu_program_register_sequence(adev,
700 golden_settings_polaris10_a11,
701 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
702 amdgpu_program_register_sequence(adev,
703 polaris10_golden_common_all,
704 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
705 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
706 if (adev->pdev->revision == 0xc7 &&
707 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
708 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
709 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
710 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
711 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
715 amdgpu_program_register_sequence(adev,
717 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
718 amdgpu_program_register_sequence(adev,
719 cz_golden_settings_a11,
720 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
721 amdgpu_program_register_sequence(adev,
722 cz_golden_common_all,
723 (const u32)ARRAY_SIZE(cz_golden_common_all));
726 amdgpu_program_register_sequence(adev,
727 stoney_mgcg_cgcg_init,
728 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
729 amdgpu_program_register_sequence(adev,
730 stoney_golden_settings_a11,
731 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
732 amdgpu_program_register_sequence(adev,
733 stoney_golden_common_all,
734 (const u32)ARRAY_SIZE(stoney_golden_common_all));
741 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
745 adev->gfx.scratch.num_reg = 7;
746 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
747 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
748 adev->gfx.scratch.free[i] = true;
749 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
753 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
755 struct amdgpu_device *adev = ring->adev;
761 r = amdgpu_gfx_scratch_get(adev, &scratch);
763 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
766 WREG32(scratch, 0xCAFEDEAD);
767 r = amdgpu_ring_alloc(ring, 3);
769 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
771 amdgpu_gfx_scratch_free(adev, scratch);
774 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
775 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
776 amdgpu_ring_write(ring, 0xDEADBEEF);
777 amdgpu_ring_commit(ring);
779 for (i = 0; i < adev->usec_timeout; i++) {
780 tmp = RREG32(scratch);
781 if (tmp == 0xDEADBEEF)
785 if (i < adev->usec_timeout) {
786 DRM_INFO("ring test on %d succeeded in %d usecs\n",
789 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
790 ring->idx, scratch, tmp);
793 amdgpu_gfx_scratch_free(adev, scratch);
797 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
799 struct amdgpu_device *adev = ring->adev;
801 struct fence *f = NULL;
806 r = amdgpu_gfx_scratch_get(adev, &scratch);
808 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
811 WREG32(scratch, 0xCAFEDEAD);
812 memset(&ib, 0, sizeof(ib));
813 r = amdgpu_ib_get(adev, NULL, 256, &ib);
815 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
818 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
819 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
820 ib.ptr[2] = 0xDEADBEEF;
823 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
827 r = fence_wait_timeout(f, false, timeout);
829 DRM_ERROR("amdgpu: IB test timed out.\n");
833 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
836 tmp = RREG32(scratch);
837 if (tmp == 0xDEADBEEF) {
838 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
841 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
846 amdgpu_ib_free(adev, &ib, NULL);
849 amdgpu_gfx_scratch_free(adev, scratch);
854 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
855 release_firmware(adev->gfx.pfp_fw);
856 adev->gfx.pfp_fw = NULL;
857 release_firmware(adev->gfx.me_fw);
858 adev->gfx.me_fw = NULL;
859 release_firmware(adev->gfx.ce_fw);
860 adev->gfx.ce_fw = NULL;
861 release_firmware(adev->gfx.rlc_fw);
862 adev->gfx.rlc_fw = NULL;
863 release_firmware(adev->gfx.mec_fw);
864 adev->gfx.mec_fw = NULL;
865 if ((adev->asic_type != CHIP_STONEY) &&
866 (adev->asic_type != CHIP_TOPAZ))
867 release_firmware(adev->gfx.mec2_fw);
868 adev->gfx.mec2_fw = NULL;
870 kfree(adev->gfx.rlc.register_list_format);
873 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
875 const char *chip_name;
878 struct amdgpu_firmware_info *info = NULL;
879 const struct common_firmware_header *header = NULL;
880 const struct gfx_firmware_header_v1_0 *cp_hdr;
881 const struct rlc_firmware_header_v2_0 *rlc_hdr;
882 unsigned int *tmp = NULL, i;
886 switch (adev->asic_type) {
894 chip_name = "carrizo";
900 chip_name = "polaris11";
903 chip_name = "polaris10";
906 chip_name = "stoney";
912 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
913 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
916 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
919 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
920 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
921 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
924 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
927 err = amdgpu_ucode_validate(adev->gfx.me_fw);
930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
931 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
935 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
938 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
942 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
946 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
949 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
950 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
951 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
952 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
954 adev->gfx.rlc.save_and_restore_offset =
955 le32_to_cpu(rlc_hdr->save_and_restore_offset);
956 adev->gfx.rlc.clear_state_descriptor_offset =
957 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
958 adev->gfx.rlc.avail_scratch_ram_locations =
959 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
960 adev->gfx.rlc.reg_restore_list_size =
961 le32_to_cpu(rlc_hdr->reg_restore_list_size);
962 adev->gfx.rlc.reg_list_format_start =
963 le32_to_cpu(rlc_hdr->reg_list_format_start);
964 adev->gfx.rlc.reg_list_format_separate_start =
965 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
966 adev->gfx.rlc.starting_offsets_start =
967 le32_to_cpu(rlc_hdr->starting_offsets_start);
968 adev->gfx.rlc.reg_list_format_size_bytes =
969 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
970 adev->gfx.rlc.reg_list_size_bytes =
971 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
973 adev->gfx.rlc.register_list_format =
974 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
975 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
977 if (!adev->gfx.rlc.register_list_format) {
982 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
984 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
985 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
987 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
989 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
990 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
991 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
992 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
994 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
995 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
998 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1001 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1002 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1003 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1005 if ((adev->asic_type != CHIP_STONEY) &&
1006 (adev->asic_type != CHIP_TOPAZ)) {
1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1008 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1010 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1013 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1014 adev->gfx.mec2_fw->data;
1015 adev->gfx.mec2_fw_version =
1016 le32_to_cpu(cp_hdr->header.ucode_version);
1017 adev->gfx.mec2_feature_version =
1018 le32_to_cpu(cp_hdr->ucode_feature_version);
1021 adev->gfx.mec2_fw = NULL;
1025 if (adev->firmware.smu_load) {
1026 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1027 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1028 info->fw = adev->gfx.pfp_fw;
1029 header = (const struct common_firmware_header *)info->fw->data;
1030 adev->firmware.fw_size +=
1031 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1033 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1034 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1035 info->fw = adev->gfx.me_fw;
1036 header = (const struct common_firmware_header *)info->fw->data;
1037 adev->firmware.fw_size +=
1038 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1040 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1041 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1042 info->fw = adev->gfx.ce_fw;
1043 header = (const struct common_firmware_header *)info->fw->data;
1044 adev->firmware.fw_size +=
1045 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1048 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1049 info->fw = adev->gfx.rlc_fw;
1050 header = (const struct common_firmware_header *)info->fw->data;
1051 adev->firmware.fw_size +=
1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1055 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1056 info->fw = adev->gfx.mec_fw;
1057 header = (const struct common_firmware_header *)info->fw->data;
1058 adev->firmware.fw_size +=
1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061 /* we need account JT in */
1062 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1063 adev->firmware.fw_size +=
1064 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1066 if (amdgpu_sriov_vf(adev)) {
1067 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1068 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1069 info->fw = adev->gfx.mec_fw;
1070 adev->firmware.fw_size +=
1071 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1074 if (adev->gfx.mec2_fw) {
1075 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1076 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1077 info->fw = adev->gfx.mec2_fw;
1078 header = (const struct common_firmware_header *)info->fw->data;
1079 adev->firmware.fw_size +=
1080 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1088 "gfx8: Failed to load firmware \"%s\"\n",
1090 release_firmware(adev->gfx.pfp_fw);
1091 adev->gfx.pfp_fw = NULL;
1092 release_firmware(adev->gfx.me_fw);
1093 adev->gfx.me_fw = NULL;
1094 release_firmware(adev->gfx.ce_fw);
1095 adev->gfx.ce_fw = NULL;
1096 release_firmware(adev->gfx.rlc_fw);
1097 adev->gfx.rlc_fw = NULL;
1098 release_firmware(adev->gfx.mec_fw);
1099 adev->gfx.mec_fw = NULL;
1100 release_firmware(adev->gfx.mec2_fw);
1101 adev->gfx.mec2_fw = NULL;
1106 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1107 volatile u32 *buffer)
1110 const struct cs_section_def *sect = NULL;
1111 const struct cs_extent_def *ext = NULL;
1113 if (adev->gfx.rlc.cs_data == NULL)
1118 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1119 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1121 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1122 buffer[count++] = cpu_to_le32(0x80000000);
1123 buffer[count++] = cpu_to_le32(0x80000000);
1125 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1126 for (ext = sect->section; ext->extent != NULL; ++ext) {
1127 if (sect->id == SECT_CONTEXT) {
1129 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1130 buffer[count++] = cpu_to_le32(ext->reg_index -
1131 PACKET3_SET_CONTEXT_REG_START);
1132 for (i = 0; i < ext->reg_count; i++)
1133 buffer[count++] = cpu_to_le32(ext->extent[i]);
1140 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1141 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1142 PACKET3_SET_CONTEXT_REG_START);
1143 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1144 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1146 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1147 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1149 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1150 buffer[count++] = cpu_to_le32(0);
1153 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1155 const __le32 *fw_data;
1156 volatile u32 *dst_ptr;
1157 int me, i, max_me = 4;
1159 u32 table_offset, table_size;
1161 if (adev->asic_type == CHIP_CARRIZO)
1164 /* write the cp table buffer */
1165 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1166 for (me = 0; me < max_me; me++) {
1168 const struct gfx_firmware_header_v1_0 *hdr =
1169 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1170 fw_data = (const __le32 *)
1171 (adev->gfx.ce_fw->data +
1172 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1173 table_offset = le32_to_cpu(hdr->jt_offset);
1174 table_size = le32_to_cpu(hdr->jt_size);
1175 } else if (me == 1) {
1176 const struct gfx_firmware_header_v1_0 *hdr =
1177 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1178 fw_data = (const __le32 *)
1179 (adev->gfx.pfp_fw->data +
1180 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1181 table_offset = le32_to_cpu(hdr->jt_offset);
1182 table_size = le32_to_cpu(hdr->jt_size);
1183 } else if (me == 2) {
1184 const struct gfx_firmware_header_v1_0 *hdr =
1185 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1186 fw_data = (const __le32 *)
1187 (adev->gfx.me_fw->data +
1188 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1189 table_offset = le32_to_cpu(hdr->jt_offset);
1190 table_size = le32_to_cpu(hdr->jt_size);
1191 } else if (me == 3) {
1192 const struct gfx_firmware_header_v1_0 *hdr =
1193 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1194 fw_data = (const __le32 *)
1195 (adev->gfx.mec_fw->data +
1196 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1197 table_offset = le32_to_cpu(hdr->jt_offset);
1198 table_size = le32_to_cpu(hdr->jt_size);
1199 } else if (me == 4) {
1200 const struct gfx_firmware_header_v1_0 *hdr =
1201 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1202 fw_data = (const __le32 *)
1203 (adev->gfx.mec2_fw->data +
1204 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1205 table_offset = le32_to_cpu(hdr->jt_offset);
1206 table_size = le32_to_cpu(hdr->jt_size);
1209 for (i = 0; i < table_size; i ++) {
1210 dst_ptr[bo_offset + i] =
1211 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1214 bo_offset += table_size;
1218 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1222 /* clear state block */
1223 if (adev->gfx.rlc.clear_state_obj) {
1224 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1225 if (unlikely(r != 0))
1226 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1227 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1228 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1229 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1230 adev->gfx.rlc.clear_state_obj = NULL;
1233 /* jump table block */
1234 if (adev->gfx.rlc.cp_table_obj) {
1235 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1236 if (unlikely(r != 0))
1237 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1238 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1239 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1240 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1241 adev->gfx.rlc.cp_table_obj = NULL;
1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1247 volatile u32 *dst_ptr;
1249 const struct cs_section_def *cs_data;
1252 adev->gfx.rlc.cs_data = vi_cs_data;
1254 cs_data = adev->gfx.rlc.cs_data;
1257 /* clear state block */
1258 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1260 if (adev->gfx.rlc.clear_state_obj == NULL) {
1261 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1262 AMDGPU_GEM_DOMAIN_VRAM,
1263 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1264 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1266 &adev->gfx.rlc.clear_state_obj);
1268 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1269 gfx_v8_0_rlc_fini(adev);
1273 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1274 if (unlikely(r != 0)) {
1275 gfx_v8_0_rlc_fini(adev);
1278 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1279 &adev->gfx.rlc.clear_state_gpu_addr);
1281 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1282 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1283 gfx_v8_0_rlc_fini(adev);
1287 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1289 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1290 gfx_v8_0_rlc_fini(adev);
1293 /* set up the cs buffer */
1294 dst_ptr = adev->gfx.rlc.cs_ptr;
1295 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1296 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1297 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1300 if ((adev->asic_type == CHIP_CARRIZO) ||
1301 (adev->asic_type == CHIP_STONEY)) {
1302 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1303 if (adev->gfx.rlc.cp_table_obj == NULL) {
1304 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1305 AMDGPU_GEM_DOMAIN_VRAM,
1306 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1307 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1309 &adev->gfx.rlc.cp_table_obj);
1311 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1316 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1317 if (unlikely(r != 0)) {
1318 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1321 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1322 &adev->gfx.rlc.cp_table_gpu_addr);
1324 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1325 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1328 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1330 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1334 cz_init_cp_jump_table(adev);
1336 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1337 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1343 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1347 if (adev->gfx.mec.hpd_eop_obj) {
1348 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1349 if (unlikely(r != 0))
1350 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1351 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1352 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1353 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1354 adev->gfx.mec.hpd_eop_obj = NULL;
1358 #define MEC_HPD_SIZE 2048
1360 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1366 * we assign only 1 pipe because all other pipes will
1369 adev->gfx.mec.num_mec = 1;
1370 adev->gfx.mec.num_pipe = 1;
1371 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1373 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1374 r = amdgpu_bo_create(adev,
1375 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1377 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1378 &adev->gfx.mec.hpd_eop_obj);
1380 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1385 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1386 if (unlikely(r != 0)) {
1387 gfx_v8_0_mec_fini(adev);
1390 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1391 &adev->gfx.mec.hpd_eop_gpu_addr);
1393 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1394 gfx_v8_0_mec_fini(adev);
1397 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1399 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1400 gfx_v8_0_mec_fini(adev);
1404 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1406 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1407 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1412 static const u32 vgpr_init_compute_shader[] =
1414 0x7e000209, 0x7e020208,
1415 0x7e040207, 0x7e060206,
1416 0x7e080205, 0x7e0a0204,
1417 0x7e0c0203, 0x7e0e0202,
1418 0x7e100201, 0x7e120200,
1419 0x7e140209, 0x7e160208,
1420 0x7e180207, 0x7e1a0206,
1421 0x7e1c0205, 0x7e1e0204,
1422 0x7e200203, 0x7e220202,
1423 0x7e240201, 0x7e260200,
1424 0x7e280209, 0x7e2a0208,
1425 0x7e2c0207, 0x7e2e0206,
1426 0x7e300205, 0x7e320204,
1427 0x7e340203, 0x7e360202,
1428 0x7e380201, 0x7e3a0200,
1429 0x7e3c0209, 0x7e3e0208,
1430 0x7e400207, 0x7e420206,
1431 0x7e440205, 0x7e460204,
1432 0x7e480203, 0x7e4a0202,
1433 0x7e4c0201, 0x7e4e0200,
1434 0x7e500209, 0x7e520208,
1435 0x7e540207, 0x7e560206,
1436 0x7e580205, 0x7e5a0204,
1437 0x7e5c0203, 0x7e5e0202,
1438 0x7e600201, 0x7e620200,
1439 0x7e640209, 0x7e660208,
1440 0x7e680207, 0x7e6a0206,
1441 0x7e6c0205, 0x7e6e0204,
1442 0x7e700203, 0x7e720202,
1443 0x7e740201, 0x7e760200,
1444 0x7e780209, 0x7e7a0208,
1445 0x7e7c0207, 0x7e7e0206,
1446 0xbf8a0000, 0xbf810000,
1449 static const u32 sgpr_init_compute_shader[] =
1451 0xbe8a0100, 0xbe8c0102,
1452 0xbe8e0104, 0xbe900106,
1453 0xbe920108, 0xbe940100,
1454 0xbe960102, 0xbe980104,
1455 0xbe9a0106, 0xbe9c0108,
1456 0xbe9e0100, 0xbea00102,
1457 0xbea20104, 0xbea40106,
1458 0xbea60108, 0xbea80100,
1459 0xbeaa0102, 0xbeac0104,
1460 0xbeae0106, 0xbeb00108,
1461 0xbeb20100, 0xbeb40102,
1462 0xbeb60104, 0xbeb80106,
1463 0xbeba0108, 0xbebc0100,
1464 0xbebe0102, 0xbec00104,
1465 0xbec20106, 0xbec40108,
1466 0xbec60100, 0xbec80102,
1467 0xbee60004, 0xbee70005,
1468 0xbeea0006, 0xbeeb0007,
1469 0xbee80008, 0xbee90009,
1470 0xbefc0000, 0xbf8a0000,
1471 0xbf810000, 0x00000000,
1474 static const u32 vgpr_init_regs[] =
1476 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1477 mmCOMPUTE_RESOURCE_LIMITS, 0,
1478 mmCOMPUTE_NUM_THREAD_X, 256*4,
1479 mmCOMPUTE_NUM_THREAD_Y, 1,
1480 mmCOMPUTE_NUM_THREAD_Z, 1,
1481 mmCOMPUTE_PGM_RSRC2, 20,
1482 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1483 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1484 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1485 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1486 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1487 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1488 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1489 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1490 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1491 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1494 static const u32 sgpr1_init_regs[] =
1496 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1497 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1498 mmCOMPUTE_NUM_THREAD_X, 256*5,
1499 mmCOMPUTE_NUM_THREAD_Y, 1,
1500 mmCOMPUTE_NUM_THREAD_Z, 1,
1501 mmCOMPUTE_PGM_RSRC2, 20,
1502 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1503 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1504 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1505 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1506 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1507 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1508 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1509 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1510 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1511 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1514 static const u32 sgpr2_init_regs[] =
1516 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1517 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1518 mmCOMPUTE_NUM_THREAD_X, 256*5,
1519 mmCOMPUTE_NUM_THREAD_Y, 1,
1520 mmCOMPUTE_NUM_THREAD_Z, 1,
1521 mmCOMPUTE_PGM_RSRC2, 20,
1522 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1523 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1524 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1525 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1526 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1527 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1528 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1529 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1530 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1531 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1534 static const u32 sec_ded_counter_registers[] =
1537 mmCPC_EDC_SCRATCH_CNT,
1538 mmCPC_EDC_UCODE_CNT,
1545 mmDC_EDC_CSINVOC_CNT,
1546 mmDC_EDC_RESTORE_CNT,
1552 mmSQC_ATC_EDC_GATCL1_CNT,
1558 mmTCP_ATC_EDC_GATCL1_CNT,
1563 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1565 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1566 struct amdgpu_ib ib;
1567 struct fence *f = NULL;
1570 unsigned total_size, vgpr_offset, sgpr_offset;
1573 /* only supported on CZ */
1574 if (adev->asic_type != CHIP_CARRIZO)
1577 /* bail if the compute ring is not ready */
1581 tmp = RREG32(mmGB_EDC_MODE);
1582 WREG32(mmGB_EDC_MODE, 0);
1585 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1587 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1589 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1590 total_size = ALIGN(total_size, 256);
1591 vgpr_offset = total_size;
1592 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1593 sgpr_offset = total_size;
1594 total_size += sizeof(sgpr_init_compute_shader);
1596 /* allocate an indirect buffer to put the commands in */
1597 memset(&ib, 0, sizeof(ib));
1598 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1600 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1604 /* load the compute shaders */
1605 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1606 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1608 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1609 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1611 /* init the ib length to 0 */
1615 /* write the register state for the compute dispatch */
1616 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1617 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1618 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1619 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1621 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1622 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1623 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1624 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1625 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1626 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1628 /* write dispatch packet */
1629 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1630 ib.ptr[ib.length_dw++] = 8; /* x */
1631 ib.ptr[ib.length_dw++] = 1; /* y */
1632 ib.ptr[ib.length_dw++] = 1; /* z */
1633 ib.ptr[ib.length_dw++] =
1634 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1636 /* write CS partial flush packet */
1637 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1638 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1641 /* write the register state for the compute dispatch */
1642 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1643 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1644 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1645 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1647 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1648 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1649 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1650 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1651 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1652 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1654 /* write dispatch packet */
1655 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1656 ib.ptr[ib.length_dw++] = 8; /* x */
1657 ib.ptr[ib.length_dw++] = 1; /* y */
1658 ib.ptr[ib.length_dw++] = 1; /* z */
1659 ib.ptr[ib.length_dw++] =
1660 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1662 /* write CS partial flush packet */
1663 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1664 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1667 /* write the register state for the compute dispatch */
1668 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1669 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1670 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1671 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1673 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1674 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1675 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1676 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1677 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1678 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1680 /* write dispatch packet */
1681 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1682 ib.ptr[ib.length_dw++] = 8; /* x */
1683 ib.ptr[ib.length_dw++] = 1; /* y */
1684 ib.ptr[ib.length_dw++] = 1; /* z */
1685 ib.ptr[ib.length_dw++] =
1686 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1688 /* write CS partial flush packet */
1689 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1690 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1692 /* shedule the ib on the ring */
1693 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1695 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1699 /* wait for the GPU to finish processing the IB */
1700 r = fence_wait(f, false);
1702 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1706 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1707 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1708 WREG32(mmGB_EDC_MODE, tmp);
1710 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1711 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1712 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1715 /* read back registers to clear the counters */
1716 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1717 RREG32(sec_ded_counter_registers[i]);
1720 amdgpu_ib_free(adev, &ib, NULL);
1726 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1729 u32 mc_shared_chmap, mc_arb_ramcfg;
1730 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1734 switch (adev->asic_type) {
1736 adev->gfx.config.max_shader_engines = 1;
1737 adev->gfx.config.max_tile_pipes = 2;
1738 adev->gfx.config.max_cu_per_sh = 6;
1739 adev->gfx.config.max_sh_per_se = 1;
1740 adev->gfx.config.max_backends_per_se = 2;
1741 adev->gfx.config.max_texture_channel_caches = 2;
1742 adev->gfx.config.max_gprs = 256;
1743 adev->gfx.config.max_gs_threads = 32;
1744 adev->gfx.config.max_hw_contexts = 8;
1746 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1747 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1748 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1749 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1750 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1753 adev->gfx.config.max_shader_engines = 4;
1754 adev->gfx.config.max_tile_pipes = 16;
1755 adev->gfx.config.max_cu_per_sh = 16;
1756 adev->gfx.config.max_sh_per_se = 1;
1757 adev->gfx.config.max_backends_per_se = 4;
1758 adev->gfx.config.max_texture_channel_caches = 16;
1759 adev->gfx.config.max_gprs = 256;
1760 adev->gfx.config.max_gs_threads = 32;
1761 adev->gfx.config.max_hw_contexts = 8;
1763 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1769 case CHIP_POLARIS11:
1770 ret = amdgpu_atombios_get_gfx_info(adev);
1773 adev->gfx.config.max_gprs = 256;
1774 adev->gfx.config.max_gs_threads = 32;
1775 adev->gfx.config.max_hw_contexts = 8;
1777 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1783 case CHIP_POLARIS10:
1784 ret = amdgpu_atombios_get_gfx_info(adev);
1787 adev->gfx.config.max_gprs = 256;
1788 adev->gfx.config.max_gs_threads = 32;
1789 adev->gfx.config.max_hw_contexts = 8;
1791 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1792 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1793 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1794 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1795 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1798 adev->gfx.config.max_shader_engines = 4;
1799 adev->gfx.config.max_tile_pipes = 8;
1800 adev->gfx.config.max_cu_per_sh = 8;
1801 adev->gfx.config.max_sh_per_se = 1;
1802 adev->gfx.config.max_backends_per_se = 2;
1803 adev->gfx.config.max_texture_channel_caches = 8;
1804 adev->gfx.config.max_gprs = 256;
1805 adev->gfx.config.max_gs_threads = 32;
1806 adev->gfx.config.max_hw_contexts = 8;
1808 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1809 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1810 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1811 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1812 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1815 adev->gfx.config.max_shader_engines = 1;
1816 adev->gfx.config.max_tile_pipes = 2;
1817 adev->gfx.config.max_sh_per_se = 1;
1818 adev->gfx.config.max_backends_per_se = 2;
1820 switch (adev->pdev->revision) {
1828 adev->gfx.config.max_cu_per_sh = 8;
1838 adev->gfx.config.max_cu_per_sh = 6;
1845 adev->gfx.config.max_cu_per_sh = 6;
1854 adev->gfx.config.max_cu_per_sh = 4;
1858 adev->gfx.config.max_texture_channel_caches = 2;
1859 adev->gfx.config.max_gprs = 256;
1860 adev->gfx.config.max_gs_threads = 32;
1861 adev->gfx.config.max_hw_contexts = 8;
1863 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1864 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1865 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1866 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1867 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1870 adev->gfx.config.max_shader_engines = 1;
1871 adev->gfx.config.max_tile_pipes = 2;
1872 adev->gfx.config.max_sh_per_se = 1;
1873 adev->gfx.config.max_backends_per_se = 1;
1875 switch (adev->pdev->revision) {
1882 adev->gfx.config.max_cu_per_sh = 3;
1888 adev->gfx.config.max_cu_per_sh = 2;
1892 adev->gfx.config.max_texture_channel_caches = 2;
1893 adev->gfx.config.max_gprs = 256;
1894 adev->gfx.config.max_gs_threads = 16;
1895 adev->gfx.config.max_hw_contexts = 8;
1897 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1898 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1899 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1900 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1901 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1904 adev->gfx.config.max_shader_engines = 2;
1905 adev->gfx.config.max_tile_pipes = 4;
1906 adev->gfx.config.max_cu_per_sh = 2;
1907 adev->gfx.config.max_sh_per_se = 1;
1908 adev->gfx.config.max_backends_per_se = 2;
1909 adev->gfx.config.max_texture_channel_caches = 4;
1910 adev->gfx.config.max_gprs = 256;
1911 adev->gfx.config.max_gs_threads = 32;
1912 adev->gfx.config.max_hw_contexts = 8;
1914 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1915 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1916 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1917 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1918 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1922 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1923 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1924 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1926 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1927 adev->gfx.config.mem_max_burst_length_bytes = 256;
1928 if (adev->flags & AMD_IS_APU) {
1929 /* Get memory bank mapping mode. */
1930 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1931 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1932 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1934 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1935 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1936 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1938 /* Validate settings in case only one DIMM installed. */
1939 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1940 dimm00_addr_map = 0;
1941 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1942 dimm01_addr_map = 0;
1943 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1944 dimm10_addr_map = 0;
1945 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1946 dimm11_addr_map = 0;
1948 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1949 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1950 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1951 adev->gfx.config.mem_row_size_in_kb = 2;
1953 adev->gfx.config.mem_row_size_in_kb = 1;
1955 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1956 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1957 if (adev->gfx.config.mem_row_size_in_kb > 4)
1958 adev->gfx.config.mem_row_size_in_kb = 4;
1961 adev->gfx.config.shader_engine_tile_size = 32;
1962 adev->gfx.config.num_gpus = 1;
1963 adev->gfx.config.multi_gpu_tile_size = 64;
1965 /* fix up row size */
1966 switch (adev->gfx.config.mem_row_size_in_kb) {
1969 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1972 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1975 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1978 adev->gfx.config.gb_addr_config = gb_addr_config;
1983 static int gfx_v8_0_sw_init(void *handle)
1986 struct amdgpu_ring *ring;
1987 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1990 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1994 /* Privileged reg */
1995 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1999 /* Privileged inst */
2000 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2004 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2006 gfx_v8_0_scratch_init(adev);
2008 r = gfx_v8_0_init_microcode(adev);
2010 DRM_ERROR("Failed to load gfx firmware!\n");
2014 r = gfx_v8_0_rlc_init(adev);
2016 DRM_ERROR("Failed to init rlc BOs!\n");
2020 r = gfx_v8_0_mec_init(adev);
2022 DRM_ERROR("Failed to init MEC BOs!\n");
2026 /* set up the gfx ring */
2027 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2028 ring = &adev->gfx.gfx_ring[i];
2029 ring->ring_obj = NULL;
2030 sprintf(ring->name, "gfx");
2031 /* no gfx doorbells on iceland */
2032 if (adev->asic_type != CHIP_TOPAZ) {
2033 ring->use_doorbell = true;
2034 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2037 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2038 AMDGPU_CP_IRQ_GFX_EOP);
2043 /* set up the compute queues */
2044 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2047 /* max 32 queues per MEC */
2048 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2049 DRM_ERROR("Too many (%d) compute rings!\n", i);
2052 ring = &adev->gfx.compute_ring[i];
2053 ring->ring_obj = NULL;
2054 ring->use_doorbell = true;
2055 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2056 ring->me = 1; /* first MEC */
2058 ring->queue = i % 8;
2059 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2060 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2061 /* type-2 packets are deprecated on MEC, use type-3 instead */
2062 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2068 /* reserve GDS, GWS and OA resource for gfx */
2069 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2070 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2071 &adev->gds.gds_gfx_bo, NULL, NULL);
2075 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2076 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2077 &adev->gds.gws_gfx_bo, NULL, NULL);
2081 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2082 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2083 &adev->gds.oa_gfx_bo, NULL, NULL);
2087 adev->gfx.ce_ram_size = 0x8000;
2089 r = gfx_v8_0_gpu_early_init(adev);
2096 static int gfx_v8_0_sw_fini(void *handle)
2099 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2101 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2102 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2103 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2105 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2106 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2107 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2108 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2110 gfx_v8_0_mec_fini(adev);
2111 gfx_v8_0_rlc_fini(adev);
2112 gfx_v8_0_free_microcode(adev);
2117 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2119 uint32_t *modearray, *mod2array;
2120 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2121 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2124 modearray = adev->gfx.config.tile_mode_array;
2125 mod2array = adev->gfx.config.macrotile_mode_array;
2127 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2128 modearray[reg_offset] = 0;
2130 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2131 mod2array[reg_offset] = 0;
2133 switch (adev->asic_type) {
2135 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136 PIPE_CONFIG(ADDR_SURF_P2) |
2137 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2139 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2140 PIPE_CONFIG(ADDR_SURF_P2) |
2141 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2142 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2143 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2144 PIPE_CONFIG(ADDR_SURF_P2) |
2145 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2147 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148 PIPE_CONFIG(ADDR_SURF_P2) |
2149 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2150 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152 PIPE_CONFIG(ADDR_SURF_P2) |
2153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2154 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156 PIPE_CONFIG(ADDR_SURF_P2) |
2157 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2160 PIPE_CONFIG(ADDR_SURF_P2) |
2161 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2162 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2164 PIPE_CONFIG(ADDR_SURF_P2));
2165 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2166 PIPE_CONFIG(ADDR_SURF_P2) |
2167 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2169 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2170 PIPE_CONFIG(ADDR_SURF_P2) |
2171 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2173 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2174 PIPE_CONFIG(ADDR_SURF_P2) |
2175 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2177 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178 PIPE_CONFIG(ADDR_SURF_P2) |
2179 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182 PIPE_CONFIG(ADDR_SURF_P2) |
2183 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2186 PIPE_CONFIG(ADDR_SURF_P2) |
2187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2190 PIPE_CONFIG(ADDR_SURF_P2) |
2191 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2193 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2194 PIPE_CONFIG(ADDR_SURF_P2) |
2195 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2197 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2198 PIPE_CONFIG(ADDR_SURF_P2) |
2199 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2201 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2202 PIPE_CONFIG(ADDR_SURF_P2) |
2203 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2205 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2206 PIPE_CONFIG(ADDR_SURF_P2) |
2207 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2210 PIPE_CONFIG(ADDR_SURF_P2) |
2211 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2214 PIPE_CONFIG(ADDR_SURF_P2) |
2215 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2216 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2218 PIPE_CONFIG(ADDR_SURF_P2) |
2219 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2222 PIPE_CONFIG(ADDR_SURF_P2) |
2223 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2226 PIPE_CONFIG(ADDR_SURF_P2) |
2227 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2228 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2229 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230 PIPE_CONFIG(ADDR_SURF_P2) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2233 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2234 PIPE_CONFIG(ADDR_SURF_P2) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2238 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2241 NUM_BANKS(ADDR_SURF_8_BANK));
2242 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2245 NUM_BANKS(ADDR_SURF_8_BANK));
2246 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249 NUM_BANKS(ADDR_SURF_8_BANK));
2250 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2253 NUM_BANKS(ADDR_SURF_8_BANK));
2254 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 NUM_BANKS(ADDR_SURF_8_BANK));
2258 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261 NUM_BANKS(ADDR_SURF_8_BANK));
2262 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265 NUM_BANKS(ADDR_SURF_8_BANK));
2266 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2269 NUM_BANKS(ADDR_SURF_16_BANK));
2270 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2273 NUM_BANKS(ADDR_SURF_16_BANK));
2274 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2275 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2276 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2277 NUM_BANKS(ADDR_SURF_16_BANK));
2278 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281 NUM_BANKS(ADDR_SURF_16_BANK));
2282 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285 NUM_BANKS(ADDR_SURF_16_BANK));
2286 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2287 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2288 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289 NUM_BANKS(ADDR_SURF_16_BANK));
2290 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2293 NUM_BANKS(ADDR_SURF_8_BANK));
2295 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2296 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2298 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2300 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2301 if (reg_offset != 7)
2302 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2306 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2321 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2333 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2335 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2336 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2339 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2340 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2352 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2377 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2381 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2389 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2393 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2397 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2401 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2405 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2409 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2429 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432 NUM_BANKS(ADDR_SURF_8_BANK));
2433 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436 NUM_BANKS(ADDR_SURF_8_BANK));
2437 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440 NUM_BANKS(ADDR_SURF_8_BANK));
2441 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444 NUM_BANKS(ADDR_SURF_8_BANK));
2445 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448 NUM_BANKS(ADDR_SURF_8_BANK));
2449 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452 NUM_BANKS(ADDR_SURF_8_BANK));
2453 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456 NUM_BANKS(ADDR_SURF_8_BANK));
2457 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460 NUM_BANKS(ADDR_SURF_8_BANK));
2461 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2463 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464 NUM_BANKS(ADDR_SURF_8_BANK));
2465 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 NUM_BANKS(ADDR_SURF_8_BANK));
2469 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 NUM_BANKS(ADDR_SURF_8_BANK));
2473 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2475 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 NUM_BANKS(ADDR_SURF_8_BANK));
2477 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 NUM_BANKS(ADDR_SURF_8_BANK));
2481 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 NUM_BANKS(ADDR_SURF_4_BANK));
2486 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2487 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2489 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2490 if (reg_offset != 7)
2491 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2495 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2516 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2524 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2528 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2529 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2532 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2536 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2540 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2554 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2556 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2582 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2586 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2590 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2594 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2598 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2618 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621 NUM_BANKS(ADDR_SURF_16_BANK));
2622 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625 NUM_BANKS(ADDR_SURF_16_BANK));
2626 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629 NUM_BANKS(ADDR_SURF_16_BANK));
2630 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633 NUM_BANKS(ADDR_SURF_16_BANK));
2634 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2636 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2637 NUM_BANKS(ADDR_SURF_16_BANK));
2638 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641 NUM_BANKS(ADDR_SURF_16_BANK));
2642 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645 NUM_BANKS(ADDR_SURF_16_BANK));
2646 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2649 NUM_BANKS(ADDR_SURF_16_BANK));
2650 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2653 NUM_BANKS(ADDR_SURF_16_BANK));
2654 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2656 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657 NUM_BANKS(ADDR_SURF_16_BANK));
2658 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2661 NUM_BANKS(ADDR_SURF_16_BANK));
2662 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665 NUM_BANKS(ADDR_SURF_8_BANK));
2666 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2669 NUM_BANKS(ADDR_SURF_4_BANK));
2670 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2673 NUM_BANKS(ADDR_SURF_4_BANK));
2675 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2676 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2678 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2679 if (reg_offset != 7)
2680 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2683 case CHIP_POLARIS11:
2684 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2692 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2696 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2700 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2708 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2712 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2716 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2717 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2718 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2730 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2734 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2754 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2755 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2757 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2759 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2761 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2763 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2766 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2767 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2769 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2771 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2773 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2774 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2775 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2778 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2782 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2783 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2786 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2787 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2790 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2791 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2799 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2802 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2809 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810 NUM_BANKS(ADDR_SURF_16_BANK));
2812 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2814 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2815 NUM_BANKS(ADDR_SURF_16_BANK));
2817 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 NUM_BANKS(ADDR_SURF_16_BANK));
2822 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2824 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825 NUM_BANKS(ADDR_SURF_16_BANK));
2827 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2829 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2830 NUM_BANKS(ADDR_SURF_16_BANK));
2832 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2834 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2835 NUM_BANKS(ADDR_SURF_16_BANK));
2837 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 NUM_BANKS(ADDR_SURF_16_BANK));
2842 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2843 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2844 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845 NUM_BANKS(ADDR_SURF_16_BANK));
2847 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2848 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2849 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2850 NUM_BANKS(ADDR_SURF_16_BANK));
2852 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855 NUM_BANKS(ADDR_SURF_16_BANK));
2857 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 NUM_BANKS(ADDR_SURF_16_BANK));
2862 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2864 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2865 NUM_BANKS(ADDR_SURF_16_BANK));
2867 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2870 NUM_BANKS(ADDR_SURF_8_BANK));
2872 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2875 NUM_BANKS(ADDR_SURF_4_BANK));
2877 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2878 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2881 if (reg_offset != 7)
2882 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2885 case CHIP_POLARIS10:
2886 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2894 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2898 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2902 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2906 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2907 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2911 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2914 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2915 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2916 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2918 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2919 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2920 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2925 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2932 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2936 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2956 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2961 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2965 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2968 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2973 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2975 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2977 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2979 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2981 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2987 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2988 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2989 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2991 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2992 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2993 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2996 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3000 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3003 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3004 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012 NUM_BANKS(ADDR_SURF_16_BANK));
3014 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3017 NUM_BANKS(ADDR_SURF_16_BANK));
3019 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 NUM_BANKS(ADDR_SURF_16_BANK));
3024 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 NUM_BANKS(ADDR_SURF_16_BANK));
3029 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3031 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3032 NUM_BANKS(ADDR_SURF_16_BANK));
3034 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3037 NUM_BANKS(ADDR_SURF_16_BANK));
3039 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3042 NUM_BANKS(ADDR_SURF_16_BANK));
3044 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3047 NUM_BANKS(ADDR_SURF_16_BANK));
3049 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3051 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3052 NUM_BANKS(ADDR_SURF_16_BANK));
3054 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3057 NUM_BANKS(ADDR_SURF_16_BANK));
3059 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3062 NUM_BANKS(ADDR_SURF_16_BANK));
3064 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3066 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3067 NUM_BANKS(ADDR_SURF_8_BANK));
3069 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3072 NUM_BANKS(ADDR_SURF_4_BANK));
3074 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3077 NUM_BANKS(ADDR_SURF_4_BANK));
3079 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3080 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3083 if (reg_offset != 7)
3084 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3088 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3089 PIPE_CONFIG(ADDR_SURF_P2) |
3090 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3092 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3093 PIPE_CONFIG(ADDR_SURF_P2) |
3094 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3096 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3097 PIPE_CONFIG(ADDR_SURF_P2) |
3098 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3100 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3101 PIPE_CONFIG(ADDR_SURF_P2) |
3102 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3104 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3105 PIPE_CONFIG(ADDR_SURF_P2) |
3106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3108 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3109 PIPE_CONFIG(ADDR_SURF_P2) |
3110 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3112 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3113 PIPE_CONFIG(ADDR_SURF_P2) |
3114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3116 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3117 PIPE_CONFIG(ADDR_SURF_P2));
3118 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3119 PIPE_CONFIG(ADDR_SURF_P2) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3122 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123 PIPE_CONFIG(ADDR_SURF_P2) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3126 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127 PIPE_CONFIG(ADDR_SURF_P2) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3130 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3131 PIPE_CONFIG(ADDR_SURF_P2) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135 PIPE_CONFIG(ADDR_SURF_P2) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3138 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3139 PIPE_CONFIG(ADDR_SURF_P2) |
3140 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3142 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3143 PIPE_CONFIG(ADDR_SURF_P2) |
3144 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3146 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3147 PIPE_CONFIG(ADDR_SURF_P2) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3151 PIPE_CONFIG(ADDR_SURF_P2) |
3152 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3154 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3155 PIPE_CONFIG(ADDR_SURF_P2) |
3156 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3158 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3159 PIPE_CONFIG(ADDR_SURF_P2) |
3160 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3162 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3163 PIPE_CONFIG(ADDR_SURF_P2) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3166 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3167 PIPE_CONFIG(ADDR_SURF_P2) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3170 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3171 PIPE_CONFIG(ADDR_SURF_P2) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3174 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3175 PIPE_CONFIG(ADDR_SURF_P2) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3178 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3179 PIPE_CONFIG(ADDR_SURF_P2) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183 PIPE_CONFIG(ADDR_SURF_P2) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3186 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3187 PIPE_CONFIG(ADDR_SURF_P2) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3194 NUM_BANKS(ADDR_SURF_8_BANK));
3195 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3198 NUM_BANKS(ADDR_SURF_8_BANK));
3199 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3202 NUM_BANKS(ADDR_SURF_8_BANK));
3203 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3206 NUM_BANKS(ADDR_SURF_8_BANK));
3207 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3209 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3210 NUM_BANKS(ADDR_SURF_8_BANK));
3211 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3214 NUM_BANKS(ADDR_SURF_8_BANK));
3215 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3218 NUM_BANKS(ADDR_SURF_8_BANK));
3219 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3220 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3221 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3222 NUM_BANKS(ADDR_SURF_16_BANK));
3223 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3226 NUM_BANKS(ADDR_SURF_16_BANK));
3227 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3230 NUM_BANKS(ADDR_SURF_16_BANK));
3231 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234 NUM_BANKS(ADDR_SURF_16_BANK));
3235 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3238 NUM_BANKS(ADDR_SURF_16_BANK));
3239 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3240 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3241 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3242 NUM_BANKS(ADDR_SURF_16_BANK));
3243 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3246 NUM_BANKS(ADDR_SURF_8_BANK));
3248 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3249 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3254 if (reg_offset != 7)
3255 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3260 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3264 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3265 PIPE_CONFIG(ADDR_SURF_P2) |
3266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3268 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3269 PIPE_CONFIG(ADDR_SURF_P2) |
3270 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3272 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3273 PIPE_CONFIG(ADDR_SURF_P2) |
3274 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3275 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3276 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3280 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3283 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3284 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3287 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3288 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3289 PIPE_CONFIG(ADDR_SURF_P2) |
3290 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3291 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3292 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3293 PIPE_CONFIG(ADDR_SURF_P2));
3294 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3295 PIPE_CONFIG(ADDR_SURF_P2) |
3296 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3298 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3299 PIPE_CONFIG(ADDR_SURF_P2) |
3300 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3302 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3303 PIPE_CONFIG(ADDR_SURF_P2) |
3304 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3306 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3307 PIPE_CONFIG(ADDR_SURF_P2) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3311 PIPE_CONFIG(ADDR_SURF_P2) |
3312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3314 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3315 PIPE_CONFIG(ADDR_SURF_P2) |
3316 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3318 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3319 PIPE_CONFIG(ADDR_SURF_P2) |
3320 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3322 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3323 PIPE_CONFIG(ADDR_SURF_P2) |
3324 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3326 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3327 PIPE_CONFIG(ADDR_SURF_P2) |
3328 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3330 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3331 PIPE_CONFIG(ADDR_SURF_P2) |
3332 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3334 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3335 PIPE_CONFIG(ADDR_SURF_P2) |
3336 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3338 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3339 PIPE_CONFIG(ADDR_SURF_P2) |
3340 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3342 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3343 PIPE_CONFIG(ADDR_SURF_P2) |
3344 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3346 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3347 PIPE_CONFIG(ADDR_SURF_P2) |
3348 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3350 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3351 PIPE_CONFIG(ADDR_SURF_P2) |
3352 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3354 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3355 PIPE_CONFIG(ADDR_SURF_P2) |
3356 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3358 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3359 PIPE_CONFIG(ADDR_SURF_P2) |
3360 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3362 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3363 PIPE_CONFIG(ADDR_SURF_P2) |
3364 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3367 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3370 NUM_BANKS(ADDR_SURF_8_BANK));
3371 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3374 NUM_BANKS(ADDR_SURF_8_BANK));
3375 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3378 NUM_BANKS(ADDR_SURF_8_BANK));
3379 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3382 NUM_BANKS(ADDR_SURF_8_BANK));
3383 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3386 NUM_BANKS(ADDR_SURF_8_BANK));
3387 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3390 NUM_BANKS(ADDR_SURF_8_BANK));
3391 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3394 NUM_BANKS(ADDR_SURF_8_BANK));
3395 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3398 NUM_BANKS(ADDR_SURF_16_BANK));
3399 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3402 NUM_BANKS(ADDR_SURF_16_BANK));
3403 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3406 NUM_BANKS(ADDR_SURF_16_BANK));
3407 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3410 NUM_BANKS(ADDR_SURF_16_BANK));
3411 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3412 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3413 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3414 NUM_BANKS(ADDR_SURF_16_BANK));
3415 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3418 NUM_BANKS(ADDR_SURF_16_BANK));
3419 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3420 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3421 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3422 NUM_BANKS(ADDR_SURF_8_BANK));
3424 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3425 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3427 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3429 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3430 if (reg_offset != 7)
3431 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3437 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3438 u32 se_num, u32 sh_num, u32 instance)
3442 if (instance == 0xffffffff)
3443 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3445 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3447 if (se_num == 0xffffffff)
3448 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3450 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3452 if (sh_num == 0xffffffff)
3453 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3455 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3457 WREG32(mmGRBM_GFX_INDEX, data);
3460 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3462 return (u32)((1ULL << bit_width) - 1);
3465 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3469 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3470 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3472 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3474 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3475 adev->gfx.config.max_sh_per_se);
3477 return (~data) & mask;
3481 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3483 switch (adev->asic_type) {
3485 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3486 RB_XSEL2(1) | PKR_MAP(2) |
3487 PKR_XSEL(1) | PKR_YSEL(1) |
3488 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3489 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3493 case CHIP_POLARIS10:
3494 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3495 SE_XSEL(1) | SE_YSEL(1);
3496 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3501 *rconf |= RB_MAP_PKR0(2);
3504 case CHIP_POLARIS11:
3505 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3506 SE_XSEL(1) | SE_YSEL(1);
3514 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3520 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3521 u32 raster_config, u32 raster_config_1,
3522 unsigned rb_mask, unsigned num_rb)
3524 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3525 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3526 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3527 unsigned rb_per_se = num_rb / num_se;
3528 unsigned se_mask[4];
3531 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3532 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3533 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3534 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3536 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3537 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3538 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3540 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3541 (!se_mask[2] && !se_mask[3]))) {
3542 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3544 if (!se_mask[0] && !se_mask[1]) {
3546 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3549 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3553 for (se = 0; se < num_se; se++) {
3554 unsigned raster_config_se = raster_config;
3555 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3556 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3557 int idx = (se / 2) * 2;
3559 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3560 raster_config_se &= ~SE_MAP_MASK;
3562 if (!se_mask[idx]) {
3563 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3565 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3569 pkr0_mask &= rb_mask;
3570 pkr1_mask &= rb_mask;
3571 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3572 raster_config_se &= ~PKR_MAP_MASK;
3575 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3577 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3581 if (rb_per_se >= 2) {
3582 unsigned rb0_mask = 1 << (se * rb_per_se);
3583 unsigned rb1_mask = rb0_mask << 1;
3585 rb0_mask &= rb_mask;
3586 rb1_mask &= rb_mask;
3587 if (!rb0_mask || !rb1_mask) {
3588 raster_config_se &= ~RB_MAP_PKR0_MASK;
3592 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3595 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3599 if (rb_per_se > 2) {
3600 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3601 rb1_mask = rb0_mask << 1;
3602 rb0_mask &= rb_mask;
3603 rb1_mask &= rb_mask;
3604 if (!rb0_mask || !rb1_mask) {
3605 raster_config_se &= ~RB_MAP_PKR1_MASK;
3609 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3612 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3618 /* GRBM_GFX_INDEX has a different offset on VI */
3619 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3620 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3621 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3624 /* GRBM_GFX_INDEX has a different offset on VI */
3625 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3628 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3632 u32 raster_config = 0, raster_config_1 = 0;
3634 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3635 adev->gfx.config.max_sh_per_se;
3636 unsigned num_rb_pipes;
3638 mutex_lock(&adev->grbm_idx_mutex);
3639 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3640 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3641 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3642 data = gfx_v8_0_get_rb_active_bitmap(adev);
3643 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3644 rb_bitmap_width_per_sh);
3647 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3649 adev->gfx.config.backend_enable_mask = active_rbs;
3650 adev->gfx.config.num_rbs = hweight32(active_rbs);
3652 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3653 adev->gfx.config.max_shader_engines, 16);
3655 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3657 if (!adev->gfx.config.backend_enable_mask ||
3658 adev->gfx.config.num_rbs >= num_rb_pipes) {
3659 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3660 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3662 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3663 adev->gfx.config.backend_enable_mask,
3667 /* cache the values for userspace */
3668 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3669 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3670 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3671 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3672 RREG32(mmCC_RB_BACKEND_DISABLE);
3673 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3674 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3675 adev->gfx.config.rb_config[i][j].raster_config =
3676 RREG32(mmPA_SC_RASTER_CONFIG);
3677 adev->gfx.config.rb_config[i][j].raster_config_1 =
3678 RREG32(mmPA_SC_RASTER_CONFIG_1);
3681 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3682 mutex_unlock(&adev->grbm_idx_mutex);
3686 * gfx_v8_0_init_compute_vmid - gart enable
3688 * @rdev: amdgpu_device pointer
3690 * Initialize compute vmid sh_mem registers
3693 #define DEFAULT_SH_MEM_BASES (0x6000)
3694 #define FIRST_COMPUTE_VMID (8)
3695 #define LAST_COMPUTE_VMID (16)
3696 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3699 uint32_t sh_mem_config;
3700 uint32_t sh_mem_bases;
3703 * Configure apertures:
3704 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3705 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3706 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3708 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3710 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3711 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3712 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3713 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3714 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3715 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3717 mutex_lock(&adev->srbm_mutex);
3718 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3719 vi_srbm_select(adev, 0, 0, 0, i);
3720 /* CP and shaders */
3721 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3722 WREG32(mmSH_MEM_APE1_BASE, 1);
3723 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3724 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3726 vi_srbm_select(adev, 0, 0, 0, 0);
3727 mutex_unlock(&adev->srbm_mutex);
3730 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3735 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3736 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3737 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3740 gfx_v8_0_tiling_mode_table_init(adev);
3741 gfx_v8_0_setup_rb(adev);
3742 gfx_v8_0_get_cu_info(adev);
3744 /* XXX SH_MEM regs */
3745 /* where to put LDS, scratch, GPUVM in FSA64 space */
3746 mutex_lock(&adev->srbm_mutex);
3747 for (i = 0; i < 16; i++) {
3748 vi_srbm_select(adev, 0, 0, 0, i);
3749 /* CP and shaders */
3751 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3752 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3753 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3754 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3755 WREG32(mmSH_MEM_CONFIG, tmp);
3757 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3758 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3759 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3760 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3761 WREG32(mmSH_MEM_CONFIG, tmp);
3764 WREG32(mmSH_MEM_APE1_BASE, 1);
3765 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3766 WREG32(mmSH_MEM_BASES, 0);
3768 vi_srbm_select(adev, 0, 0, 0, 0);
3769 mutex_unlock(&adev->srbm_mutex);
3771 gfx_v8_0_init_compute_vmid(adev);
3773 mutex_lock(&adev->grbm_idx_mutex);
3775 * making sure that the following register writes will be broadcasted
3776 * to all the shaders
3778 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3780 WREG32(mmPA_SC_FIFO_SIZE,
3781 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3782 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3783 (adev->gfx.config.sc_prim_fifo_size_backend <<
3784 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3785 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3786 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3787 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3788 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3789 mutex_unlock(&adev->grbm_idx_mutex);
3793 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3798 mutex_lock(&adev->grbm_idx_mutex);
3799 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3800 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3801 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3802 for (k = 0; k < adev->usec_timeout; k++) {
3803 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3809 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3810 mutex_unlock(&adev->grbm_idx_mutex);
3812 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3813 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3814 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3815 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3816 for (k = 0; k < adev->usec_timeout; k++) {
3817 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3823 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3826 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3828 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3829 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3830 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3831 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3833 WREG32(mmCP_INT_CNTL_RING0, tmp);
3836 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3839 WREG32(mmRLC_CSIB_ADDR_HI,
3840 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3841 WREG32(mmRLC_CSIB_ADDR_LO,
3842 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3843 WREG32(mmRLC_CSIB_LENGTH,
3844 adev->gfx.rlc.clear_state_size);
3847 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3850 int *unique_indices,
3853 int *ind_start_offsets,
3858 bool new_entry = true;
3860 for (; ind_offset < list_size; ind_offset++) {
3864 ind_start_offsets[*offset_count] = ind_offset;
3865 *offset_count = *offset_count + 1;
3866 BUG_ON(*offset_count >= max_offset);
3869 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3876 /* look for the matching indice */
3878 indices < *indices_count;
3880 if (unique_indices[indices] ==
3881 register_list_format[ind_offset])
3885 if (indices >= *indices_count) {
3886 unique_indices[*indices_count] =
3887 register_list_format[ind_offset];
3888 indices = *indices_count;
3889 *indices_count = *indices_count + 1;
3890 BUG_ON(*indices_count >= max_indices);
3893 register_list_format[ind_offset] = indices;
3897 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3900 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3901 int indices_count = 0;
3902 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3903 int offset_count = 0;
3906 unsigned int *register_list_format =
3907 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3908 if (register_list_format == NULL)
3910 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3911 adev->gfx.rlc.reg_list_format_size_bytes);
3913 gfx_v8_0_parse_ind_reg_list(register_list_format,
3914 RLC_FormatDirectRegListLength,
3915 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3918 sizeof(unique_indices) / sizeof(int),
3919 indirect_start_offsets,
3921 sizeof(indirect_start_offsets)/sizeof(int));
3923 /* save and restore list */
3924 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3926 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3927 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3928 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3931 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3932 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3933 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3935 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3936 list_size = list_size >> 1;
3937 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3938 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3940 /* starting offsets starts */
3941 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3942 adev->gfx.rlc.starting_offsets_start);
3943 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3944 WREG32(mmRLC_GPM_SCRATCH_DATA,
3945 indirect_start_offsets[i]);
3947 /* unique indices */
3948 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3949 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3950 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3951 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3952 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3954 kfree(register_list_format);
3959 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3961 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3964 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3968 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3969 AMD_PG_SUPPORT_GFX_SMG |
3970 AMD_PG_SUPPORT_GFX_DMG)) {
3971 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3973 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3974 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3975 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3976 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3977 WREG32(mmRLC_PG_DELAY, data);
3979 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3980 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3984 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3987 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3990 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3993 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3996 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3998 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
4001 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4003 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4004 AMD_PG_SUPPORT_GFX_SMG |
4005 AMD_PG_SUPPORT_GFX_DMG |
4007 AMD_PG_SUPPORT_GDS |
4008 AMD_PG_SUPPORT_RLC_SMU_HS)) {
4009 gfx_v8_0_init_csb(adev);
4010 gfx_v8_0_init_save_restore_list(adev);
4011 gfx_v8_0_enable_save_restore_machine(adev);
4013 if ((adev->asic_type == CHIP_CARRIZO) ||
4014 (adev->asic_type == CHIP_STONEY)) {
4015 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4016 gfx_v8_0_init_power_gating(adev);
4017 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4018 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4019 cz_enable_sck_slow_down_on_power_up(adev, true);
4020 cz_enable_sck_slow_down_on_power_down(adev, true);
4022 cz_enable_sck_slow_down_on_power_up(adev, false);
4023 cz_enable_sck_slow_down_on_power_down(adev, false);
4025 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4026 cz_enable_cp_power_gating(adev, true);
4028 cz_enable_cp_power_gating(adev, false);
4029 } else if (adev->asic_type == CHIP_POLARIS11) {
4030 gfx_v8_0_init_power_gating(adev);
4035 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4037 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4039 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4040 gfx_v8_0_wait_for_rlc_serdes(adev);
4043 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4045 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4048 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4052 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4054 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4056 /* carrizo do enable cp interrupt after cp inited */
4057 if (!(adev->flags & AMD_IS_APU))
4058 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4063 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4065 const struct rlc_firmware_header_v2_0 *hdr;
4066 const __le32 *fw_data;
4067 unsigned i, fw_size;
4069 if (!adev->gfx.rlc_fw)
4072 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4073 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4075 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4076 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4077 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4079 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4080 for (i = 0; i < fw_size; i++)
4081 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4082 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4087 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4092 gfx_v8_0_rlc_stop(adev);
4095 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4096 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4097 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4098 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4099 if (adev->asic_type == CHIP_POLARIS11 ||
4100 adev->asic_type == CHIP_POLARIS10) {
4101 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4103 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4107 WREG32(mmRLC_PG_CNTL, 0);
4109 gfx_v8_0_rlc_reset(adev);
4110 gfx_v8_0_init_pg(adev);
4112 if (!adev->pp_enabled) {
4113 if (!adev->firmware.smu_load) {
4114 /* legacy rlc firmware loading */
4115 r = gfx_v8_0_rlc_load_microcode(adev);
4119 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4120 AMDGPU_UCODE_ID_RLC_G);
4126 gfx_v8_0_rlc_start(adev);
4131 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4134 u32 tmp = RREG32(mmCP_ME_CNTL);
4137 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4138 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4139 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4141 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4142 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4143 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4144 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4145 adev->gfx.gfx_ring[i].ready = false;
4147 WREG32(mmCP_ME_CNTL, tmp);
4151 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4153 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4154 const struct gfx_firmware_header_v1_0 *ce_hdr;
4155 const struct gfx_firmware_header_v1_0 *me_hdr;
4156 const __le32 *fw_data;
4157 unsigned i, fw_size;
4159 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4162 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4163 adev->gfx.pfp_fw->data;
4164 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4165 adev->gfx.ce_fw->data;
4166 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4167 adev->gfx.me_fw->data;
4169 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4170 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4171 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4173 gfx_v8_0_cp_gfx_enable(adev, false);
4176 fw_data = (const __le32 *)
4177 (adev->gfx.pfp_fw->data +
4178 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4179 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4180 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4181 for (i = 0; i < fw_size; i++)
4182 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4183 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4186 fw_data = (const __le32 *)
4187 (adev->gfx.ce_fw->data +
4188 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4189 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4190 WREG32(mmCP_CE_UCODE_ADDR, 0);
4191 for (i = 0; i < fw_size; i++)
4192 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4193 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4196 fw_data = (const __le32 *)
4197 (adev->gfx.me_fw->data +
4198 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4199 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4200 WREG32(mmCP_ME_RAM_WADDR, 0);
4201 for (i = 0; i < fw_size; i++)
4202 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4203 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4208 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4211 const struct cs_section_def *sect = NULL;
4212 const struct cs_extent_def *ext = NULL;
4214 /* begin clear state */
4216 /* context control state */
4219 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4220 for (ext = sect->section; ext->extent != NULL; ++ext) {
4221 if (sect->id == SECT_CONTEXT)
4222 count += 2 + ext->reg_count;
4227 /* pa_sc_raster_config/pa_sc_raster_config1 */
4229 /* end clear state */
4237 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4239 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4240 const struct cs_section_def *sect = NULL;
4241 const struct cs_extent_def *ext = NULL;
4245 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4246 WREG32(mmCP_ENDIAN_SWAP, 0);
4247 WREG32(mmCP_DEVICE_ID, 1);
4249 gfx_v8_0_cp_gfx_enable(adev, true);
4251 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4253 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4257 /* clear state buffer */
4258 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4259 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4261 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4262 amdgpu_ring_write(ring, 0x80000000);
4263 amdgpu_ring_write(ring, 0x80000000);
4265 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4266 for (ext = sect->section; ext->extent != NULL; ++ext) {
4267 if (sect->id == SECT_CONTEXT) {
4268 amdgpu_ring_write(ring,
4269 PACKET3(PACKET3_SET_CONTEXT_REG,
4271 amdgpu_ring_write(ring,
4272 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4273 for (i = 0; i < ext->reg_count; i++)
4274 amdgpu_ring_write(ring, ext->extent[i]);
4279 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4280 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4281 switch (adev->asic_type) {
4283 case CHIP_POLARIS10:
4284 amdgpu_ring_write(ring, 0x16000012);
4285 amdgpu_ring_write(ring, 0x0000002A);
4287 case CHIP_POLARIS11:
4288 amdgpu_ring_write(ring, 0x16000012);
4289 amdgpu_ring_write(ring, 0x00000000);
4292 amdgpu_ring_write(ring, 0x3a00161a);
4293 amdgpu_ring_write(ring, 0x0000002e);
4296 amdgpu_ring_write(ring, 0x00000002);
4297 amdgpu_ring_write(ring, 0x00000000);
4300 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4301 0x00000000 : 0x00000002);
4302 amdgpu_ring_write(ring, 0x00000000);
4305 amdgpu_ring_write(ring, 0x00000000);
4306 amdgpu_ring_write(ring, 0x00000000);
4312 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4313 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4315 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4316 amdgpu_ring_write(ring, 0);
4318 /* init the CE partitions */
4319 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4320 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4321 amdgpu_ring_write(ring, 0x8000);
4322 amdgpu_ring_write(ring, 0x8000);
4324 amdgpu_ring_commit(ring);
4329 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4331 struct amdgpu_ring *ring;
4334 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4337 /* Set the write pointer delay */
4338 WREG32(mmCP_RB_WPTR_DELAY, 0);
4340 /* set the RB to use vmid 0 */
4341 WREG32(mmCP_RB_VMID, 0);
4343 /* Set ring buffer size */
4344 ring = &adev->gfx.gfx_ring[0];
4345 rb_bufsz = order_base_2(ring->ring_size / 8);
4346 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4347 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4348 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4349 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4351 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4353 WREG32(mmCP_RB0_CNTL, tmp);
4355 /* Initialize the ring buffer's read and write pointers */
4356 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4358 WREG32(mmCP_RB0_WPTR, ring->wptr);
4360 /* set the wb address wether it's enabled or not */
4361 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4362 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4363 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4365 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4366 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4367 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4369 WREG32(mmCP_RB0_CNTL, tmp);
4371 rb_addr = ring->gpu_addr >> 8;
4372 WREG32(mmCP_RB0_BASE, rb_addr);
4373 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4375 /* no gfx doorbells on iceland */
4376 if (adev->asic_type != CHIP_TOPAZ) {
4377 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4378 if (ring->use_doorbell) {
4379 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4380 DOORBELL_OFFSET, ring->doorbell_index);
4381 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4383 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4386 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4389 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4391 if (adev->asic_type == CHIP_TONGA) {
4392 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4393 DOORBELL_RANGE_LOWER,
4394 AMDGPU_DOORBELL_GFX_RING0);
4395 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4397 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4398 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4403 /* start the ring */
4404 gfx_v8_0_cp_gfx_start(adev);
4406 r = amdgpu_ring_test_ring(ring);
4408 ring->ready = false;
4413 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4418 WREG32(mmCP_MEC_CNTL, 0);
4420 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4421 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4422 adev->gfx.compute_ring[i].ready = false;
4427 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4429 const struct gfx_firmware_header_v1_0 *mec_hdr;
4430 const __le32 *fw_data;
4431 unsigned i, fw_size;
4433 if (!adev->gfx.mec_fw)
4436 gfx_v8_0_cp_compute_enable(adev, false);
4438 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4439 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4441 fw_data = (const __le32 *)
4442 (adev->gfx.mec_fw->data +
4443 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4444 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4447 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4448 for (i = 0; i < fw_size; i++)
4449 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4450 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4452 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4453 if (adev->gfx.mec2_fw) {
4454 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4456 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4457 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4459 fw_data = (const __le32 *)
4460 (adev->gfx.mec2_fw->data +
4461 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4462 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4464 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4465 for (i = 0; i < fw_size; i++)
4466 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4467 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4474 uint32_t header; /* ordinal0 */
4475 uint32_t compute_dispatch_initiator; /* ordinal1 */
4476 uint32_t compute_dim_x; /* ordinal2 */
4477 uint32_t compute_dim_y; /* ordinal3 */
4478 uint32_t compute_dim_z; /* ordinal4 */
4479 uint32_t compute_start_x; /* ordinal5 */
4480 uint32_t compute_start_y; /* ordinal6 */
4481 uint32_t compute_start_z; /* ordinal7 */
4482 uint32_t compute_num_thread_x; /* ordinal8 */
4483 uint32_t compute_num_thread_y; /* ordinal9 */
4484 uint32_t compute_num_thread_z; /* ordinal10 */
4485 uint32_t compute_pipelinestat_enable; /* ordinal11 */
4486 uint32_t compute_perfcount_enable; /* ordinal12 */
4487 uint32_t compute_pgm_lo; /* ordinal13 */
4488 uint32_t compute_pgm_hi; /* ordinal14 */
4489 uint32_t compute_tba_lo; /* ordinal15 */
4490 uint32_t compute_tba_hi; /* ordinal16 */
4491 uint32_t compute_tma_lo; /* ordinal17 */
4492 uint32_t compute_tma_hi; /* ordinal18 */
4493 uint32_t compute_pgm_rsrc1; /* ordinal19 */
4494 uint32_t compute_pgm_rsrc2; /* ordinal20 */
4495 uint32_t compute_vmid; /* ordinal21 */
4496 uint32_t compute_resource_limits; /* ordinal22 */
4497 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
4498 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
4499 uint32_t compute_tmpring_size; /* ordinal25 */
4500 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
4501 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
4502 uint32_t compute_restart_x; /* ordinal28 */
4503 uint32_t compute_restart_y; /* ordinal29 */
4504 uint32_t compute_restart_z; /* ordinal30 */
4505 uint32_t compute_thread_trace_enable; /* ordinal31 */
4506 uint32_t compute_misc_reserved; /* ordinal32 */
4507 uint32_t compute_dispatch_id; /* ordinal33 */
4508 uint32_t compute_threadgroup_id; /* ordinal34 */
4509 uint32_t compute_relaunch; /* ordinal35 */
4510 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
4511 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
4512 uint32_t compute_wave_restore_control; /* ordinal38 */
4513 uint32_t reserved9; /* ordinal39 */
4514 uint32_t reserved10; /* ordinal40 */
4515 uint32_t reserved11; /* ordinal41 */
4516 uint32_t reserved12; /* ordinal42 */
4517 uint32_t reserved13; /* ordinal43 */
4518 uint32_t reserved14; /* ordinal44 */
4519 uint32_t reserved15; /* ordinal45 */
4520 uint32_t reserved16; /* ordinal46 */
4521 uint32_t reserved17; /* ordinal47 */
4522 uint32_t reserved18; /* ordinal48 */
4523 uint32_t reserved19; /* ordinal49 */
4524 uint32_t reserved20; /* ordinal50 */
4525 uint32_t reserved21; /* ordinal51 */
4526 uint32_t reserved22; /* ordinal52 */
4527 uint32_t reserved23; /* ordinal53 */
4528 uint32_t reserved24; /* ordinal54 */
4529 uint32_t reserved25; /* ordinal55 */
4530 uint32_t reserved26; /* ordinal56 */
4531 uint32_t reserved27; /* ordinal57 */
4532 uint32_t reserved28; /* ordinal58 */
4533 uint32_t reserved29; /* ordinal59 */
4534 uint32_t reserved30; /* ordinal60 */
4535 uint32_t reserved31; /* ordinal61 */
4536 uint32_t reserved32; /* ordinal62 */
4537 uint32_t reserved33; /* ordinal63 */
4538 uint32_t reserved34; /* ordinal64 */
4539 uint32_t compute_user_data_0; /* ordinal65 */
4540 uint32_t compute_user_data_1; /* ordinal66 */
4541 uint32_t compute_user_data_2; /* ordinal67 */
4542 uint32_t compute_user_data_3; /* ordinal68 */
4543 uint32_t compute_user_data_4; /* ordinal69 */
4544 uint32_t compute_user_data_5; /* ordinal70 */
4545 uint32_t compute_user_data_6; /* ordinal71 */
4546 uint32_t compute_user_data_7; /* ordinal72 */
4547 uint32_t compute_user_data_8; /* ordinal73 */
4548 uint32_t compute_user_data_9; /* ordinal74 */
4549 uint32_t compute_user_data_10; /* ordinal75 */
4550 uint32_t compute_user_data_11; /* ordinal76 */
4551 uint32_t compute_user_data_12; /* ordinal77 */
4552 uint32_t compute_user_data_13; /* ordinal78 */
4553 uint32_t compute_user_data_14; /* ordinal79 */
4554 uint32_t compute_user_data_15; /* ordinal80 */
4555 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
4556 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
4557 uint32_t reserved35; /* ordinal83 */
4558 uint32_t reserved36; /* ordinal84 */
4559 uint32_t reserved37; /* ordinal85 */
4560 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
4561 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
4562 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
4563 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
4564 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
4565 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
4566 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
4567 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
4568 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
4569 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
4570 uint32_t reserved38; /* ordinal96 */
4571 uint32_t reserved39; /* ordinal97 */
4572 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
4573 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
4574 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
4575 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
4576 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
4577 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
4578 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
4579 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
4580 uint32_t reserved40; /* ordinal106 */
4581 uint32_t reserved41; /* ordinal107 */
4582 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
4583 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
4584 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
4585 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
4586 uint32_t reserved42; /* ordinal112 */
4587 uint32_t reserved43; /* ordinal113 */
4588 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
4589 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
4590 uint32_t cp_packet_id_lo; /* ordinal116 */
4591 uint32_t cp_packet_id_hi; /* ordinal117 */
4592 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
4593 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
4594 uint32_t gds_save_base_addr_lo; /* ordinal120 */
4595 uint32_t gds_save_base_addr_hi; /* ordinal121 */
4596 uint32_t gds_save_mask_lo; /* ordinal122 */
4597 uint32_t gds_save_mask_hi; /* ordinal123 */
4598 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
4599 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
4600 uint32_t reserved44; /* ordinal126 */
4601 uint32_t reserved45; /* ordinal127 */
4602 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
4603 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
4604 uint32_t cp_hqd_active; /* ordinal130 */
4605 uint32_t cp_hqd_vmid; /* ordinal131 */
4606 uint32_t cp_hqd_persistent_state; /* ordinal132 */
4607 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
4608 uint32_t cp_hqd_queue_priority; /* ordinal134 */
4609 uint32_t cp_hqd_quantum; /* ordinal135 */
4610 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
4611 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
4612 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
4613 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
4614 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
4615 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
4616 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
4617 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
4618 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
4619 uint32_t cp_hqd_pq_control; /* ordinal145 */
4620 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
4621 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
4622 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
4623 uint32_t cp_hqd_ib_control; /* ordinal149 */
4624 uint32_t cp_hqd_iq_timer; /* ordinal150 */
4625 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
4626 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
4627 uint32_t cp_hqd_dma_offload; /* ordinal153 */
4628 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
4629 uint32_t cp_hqd_msg_type; /* ordinal155 */
4630 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
4631 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
4632 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
4633 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
4634 uint32_t cp_hqd_hq_status0; /* ordinal160 */
4635 uint32_t cp_hqd_hq_control0; /* ordinal161 */
4636 uint32_t cp_mqd_control; /* ordinal162 */
4637 uint32_t cp_hqd_hq_status1; /* ordinal163 */
4638 uint32_t cp_hqd_hq_control1; /* ordinal164 */
4639 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
4640 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
4641 uint32_t cp_hqd_eop_control; /* ordinal167 */
4642 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
4643 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
4644 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
4645 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
4646 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
4647 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
4648 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
4649 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
4650 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
4651 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
4652 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
4653 uint32_t cp_hqd_error; /* ordinal179 */
4654 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
4655 uint32_t cp_hqd_eop_dones; /* ordinal181 */
4656 uint32_t reserved46; /* ordinal182 */
4657 uint32_t reserved47; /* ordinal183 */
4658 uint32_t reserved48; /* ordinal184 */
4659 uint32_t reserved49; /* ordinal185 */
4660 uint32_t reserved50; /* ordinal186 */
4661 uint32_t reserved51; /* ordinal187 */
4662 uint32_t reserved52; /* ordinal188 */
4663 uint32_t reserved53; /* ordinal189 */
4664 uint32_t reserved54; /* ordinal190 */
4665 uint32_t reserved55; /* ordinal191 */
4666 uint32_t iqtimer_pkt_header; /* ordinal192 */
4667 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
4668 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
4669 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
4670 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
4671 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
4672 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
4673 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
4674 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
4675 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
4676 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
4677 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
4678 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
4679 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
4680 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
4681 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
4682 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
4683 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
4684 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
4685 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
4686 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
4687 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
4688 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
4689 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
4690 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
4691 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
4692 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
4693 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
4694 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
4695 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
4696 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
4697 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
4698 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
4699 uint32_t reserved56; /* ordinal225 */
4700 uint32_t reserved57; /* ordinal226 */
4701 uint32_t reserved58; /* ordinal227 */
4702 uint32_t set_resources_header; /* ordinal228 */
4703 uint32_t set_resources_dw1; /* ordinal229 */
4704 uint32_t set_resources_dw2; /* ordinal230 */
4705 uint32_t set_resources_dw3; /* ordinal231 */
4706 uint32_t set_resources_dw4; /* ordinal232 */
4707 uint32_t set_resources_dw5; /* ordinal233 */
4708 uint32_t set_resources_dw6; /* ordinal234 */
4709 uint32_t set_resources_dw7; /* ordinal235 */
4710 uint32_t reserved59; /* ordinal236 */
4711 uint32_t reserved60; /* ordinal237 */
4712 uint32_t reserved61; /* ordinal238 */
4713 uint32_t reserved62; /* ordinal239 */
4714 uint32_t reserved63; /* ordinal240 */
4715 uint32_t reserved64; /* ordinal241 */
4716 uint32_t reserved65; /* ordinal242 */
4717 uint32_t reserved66; /* ordinal243 */
4718 uint32_t reserved67; /* ordinal244 */
4719 uint32_t reserved68; /* ordinal245 */
4720 uint32_t reserved69; /* ordinal246 */
4721 uint32_t reserved70; /* ordinal247 */
4722 uint32_t reserved71; /* ordinal248 */
4723 uint32_t reserved72; /* ordinal249 */
4724 uint32_t reserved73; /* ordinal250 */
4725 uint32_t reserved74; /* ordinal251 */
4726 uint32_t reserved75; /* ordinal252 */
4727 uint32_t reserved76; /* ordinal253 */
4728 uint32_t reserved77; /* ordinal254 */
4729 uint32_t reserved78; /* ordinal255 */
4731 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4734 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4738 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4739 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4741 if (ring->mqd_obj) {
4742 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4743 if (unlikely(r != 0))
4744 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4746 amdgpu_bo_unpin(ring->mqd_obj);
4747 amdgpu_bo_unreserve(ring->mqd_obj);
4749 amdgpu_bo_unref(&ring->mqd_obj);
4750 ring->mqd_obj = NULL;
4755 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4759 bool use_doorbell = true;
4767 /* init the pipes */
4768 mutex_lock(&adev->srbm_mutex);
4769 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4770 int me = (i < 4) ? 1 : 2;
4771 int pipe = (i < 4) ? i : (i - 4);
4773 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4776 vi_srbm_select(adev, me, pipe, 0, 0);
4778 /* write the EOP addr */
4779 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4780 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4782 /* set the VMID assigned */
4783 WREG32(mmCP_HQD_VMID, 0);
4785 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4786 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4787 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4788 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4789 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4791 vi_srbm_select(adev, 0, 0, 0, 0);
4792 mutex_unlock(&adev->srbm_mutex);
4794 /* init the queues. Just two for now. */
4795 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4796 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4798 if (ring->mqd_obj == NULL) {
4799 r = amdgpu_bo_create(adev,
4800 sizeof(struct vi_mqd),
4802 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4803 NULL, &ring->mqd_obj);
4805 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4810 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4811 if (unlikely(r != 0)) {
4812 gfx_v8_0_cp_compute_fini(adev);
4815 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4818 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4819 gfx_v8_0_cp_compute_fini(adev);
4822 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4824 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4825 gfx_v8_0_cp_compute_fini(adev);
4829 /* init the mqd struct */
4830 memset(buf, 0, sizeof(struct vi_mqd));
4832 mqd = (struct vi_mqd *)buf;
4833 mqd->header = 0xC0310800;
4834 mqd->compute_pipelinestat_enable = 0x00000001;
4835 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4836 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4837 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4838 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4839 mqd->compute_misc_reserved = 0x00000003;
4841 mutex_lock(&adev->srbm_mutex);
4842 vi_srbm_select(adev, ring->me,
4846 /* disable wptr polling */
4847 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4848 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4849 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4851 mqd->cp_hqd_eop_base_addr_lo =
4852 RREG32(mmCP_HQD_EOP_BASE_ADDR);
4853 mqd->cp_hqd_eop_base_addr_hi =
4854 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4856 /* enable doorbell? */
4857 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4859 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4861 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4863 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4864 mqd->cp_hqd_pq_doorbell_control = tmp;
4866 /* disable the queue if it's active */
4867 mqd->cp_hqd_dequeue_request = 0;
4868 mqd->cp_hqd_pq_rptr = 0;
4869 mqd->cp_hqd_pq_wptr= 0;
4870 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4871 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4872 for (j = 0; j < adev->usec_timeout; j++) {
4873 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4877 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4878 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4879 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4882 /* set the pointer to the MQD */
4883 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4884 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4885 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4886 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4888 /* set MQD vmid to 0 */
4889 tmp = RREG32(mmCP_MQD_CONTROL);
4890 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4891 WREG32(mmCP_MQD_CONTROL, tmp);
4892 mqd->cp_mqd_control = tmp;
4894 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4895 hqd_gpu_addr = ring->gpu_addr >> 8;
4896 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4897 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4898 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4899 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4901 /* set up the HQD, this is similar to CP_RB0_CNTL */
4902 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4903 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4904 (order_base_2(ring->ring_size / 4) - 1));
4905 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4906 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4908 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4910 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4911 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4912 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4913 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4914 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4915 mqd->cp_hqd_pq_control = tmp;
4917 /* set the wb address wether it's enabled or not */
4918 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4919 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4920 mqd->cp_hqd_pq_rptr_report_addr_hi =
4921 upper_32_bits(wb_gpu_addr) & 0xffff;
4922 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4923 mqd->cp_hqd_pq_rptr_report_addr_lo);
4924 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4925 mqd->cp_hqd_pq_rptr_report_addr_hi);
4927 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4928 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4929 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4930 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4931 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4932 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4933 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4935 /* enable the doorbell if requested */
4937 if ((adev->asic_type == CHIP_CARRIZO) ||
4938 (adev->asic_type == CHIP_FIJI) ||
4939 (adev->asic_type == CHIP_STONEY) ||
4940 (adev->asic_type == CHIP_POLARIS11) ||
4941 (adev->asic_type == CHIP_POLARIS10)) {
4942 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4943 AMDGPU_DOORBELL_KIQ << 2);
4944 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4945 AMDGPU_DOORBELL_MEC_RING7 << 2);
4947 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4948 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4949 DOORBELL_OFFSET, ring->doorbell_index);
4950 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4951 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4952 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4953 mqd->cp_hqd_pq_doorbell_control = tmp;
4956 mqd->cp_hqd_pq_doorbell_control = 0;
4958 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4959 mqd->cp_hqd_pq_doorbell_control);
4961 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4963 mqd->cp_hqd_pq_wptr = ring->wptr;
4964 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4965 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4967 /* set the vmid for the queue */
4968 mqd->cp_hqd_vmid = 0;
4969 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4971 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4972 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4973 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4974 mqd->cp_hqd_persistent_state = tmp;
4975 if (adev->asic_type == CHIP_STONEY ||
4976 adev->asic_type == CHIP_POLARIS11 ||
4977 adev->asic_type == CHIP_POLARIS10) {
4978 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4979 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4980 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4983 /* activate the queue */
4984 mqd->cp_hqd_active = 1;
4985 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4987 vi_srbm_select(adev, 0, 0, 0, 0);
4988 mutex_unlock(&adev->srbm_mutex);
4990 amdgpu_bo_kunmap(ring->mqd_obj);
4991 amdgpu_bo_unreserve(ring->mqd_obj);
4995 tmp = RREG32(mmCP_PQ_STATUS);
4996 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4997 WREG32(mmCP_PQ_STATUS, tmp);
5000 gfx_v8_0_cp_compute_enable(adev, true);
5002 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5003 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5006 r = amdgpu_ring_test_ring(ring);
5008 ring->ready = false;
5014 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5018 if (!(adev->flags & AMD_IS_APU))
5019 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5021 if (!adev->pp_enabled) {
5022 if (!adev->firmware.smu_load) {
5023 /* legacy firmware loading */
5024 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5028 r = gfx_v8_0_cp_compute_load_microcode(adev);
5032 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5033 AMDGPU_UCODE_ID_CP_CE);
5037 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5038 AMDGPU_UCODE_ID_CP_PFP);
5042 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5043 AMDGPU_UCODE_ID_CP_ME);
5047 if (adev->asic_type == CHIP_TOPAZ) {
5048 r = gfx_v8_0_cp_compute_load_microcode(adev);
5052 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5053 AMDGPU_UCODE_ID_CP_MEC1);
5060 r = gfx_v8_0_cp_gfx_resume(adev);
5064 r = gfx_v8_0_cp_compute_resume(adev);
5068 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5073 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5075 gfx_v8_0_cp_gfx_enable(adev, enable);
5076 gfx_v8_0_cp_compute_enable(adev, enable);
5079 static int gfx_v8_0_hw_init(void *handle)
5082 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5084 gfx_v8_0_init_golden_registers(adev);
5085 gfx_v8_0_gpu_init(adev);
5087 r = gfx_v8_0_rlc_resume(adev);
5091 r = gfx_v8_0_cp_resume(adev);
5096 static int gfx_v8_0_hw_fini(void *handle)
5098 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5100 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5101 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5102 gfx_v8_0_cp_enable(adev, false);
5103 gfx_v8_0_rlc_stop(adev);
5104 gfx_v8_0_cp_compute_fini(adev);
5106 amdgpu_set_powergating_state(adev,
5107 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5112 static int gfx_v8_0_suspend(void *handle)
5114 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5116 return gfx_v8_0_hw_fini(adev);
5119 static int gfx_v8_0_resume(void *handle)
5121 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5123 return gfx_v8_0_hw_init(adev);
5126 static bool gfx_v8_0_is_idle(void *handle)
5128 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5130 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5136 static int gfx_v8_0_wait_for_idle(void *handle)
5139 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5141 for (i = 0; i < adev->usec_timeout; i++) {
5142 if (gfx_v8_0_is_idle(handle))
5150 static bool gfx_v8_0_check_soft_reset(void *handle)
5152 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5153 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5157 tmp = RREG32(mmGRBM_STATUS);
5158 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5159 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5160 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5161 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5162 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5163 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5164 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5165 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5166 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5167 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5168 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5169 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5170 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5174 tmp = RREG32(mmGRBM_STATUS2);
5175 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5176 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5177 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5179 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5180 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5181 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5182 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5184 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5186 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5188 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5189 SOFT_RESET_GRBM, 1);
5193 tmp = RREG32(mmSRBM_STATUS);
5194 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5195 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5196 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5197 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5198 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5199 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5201 if (grbm_soft_reset || srbm_soft_reset) {
5202 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5203 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5206 adev->gfx.grbm_soft_reset = 0;
5207 adev->gfx.srbm_soft_reset = 0;
5212 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5213 struct amdgpu_ring *ring)
5217 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5218 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5220 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5221 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5223 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5224 for (i = 0; i < adev->usec_timeout; i++) {
5225 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5232 static int gfx_v8_0_pre_soft_reset(void *handle)
5234 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5235 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5237 if ((!adev->gfx.grbm_soft_reset) &&
5238 (!adev->gfx.srbm_soft_reset))
5241 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5242 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5245 gfx_v8_0_rlc_stop(adev);
5247 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5248 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5249 /* Disable GFX parsing/prefetching */
5250 gfx_v8_0_cp_gfx_enable(adev, false);
5252 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5253 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5254 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5255 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5258 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5259 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5261 gfx_v8_0_inactive_hqd(adev, ring);
5263 /* Disable MEC parsing/prefetching */
5264 gfx_v8_0_cp_compute_enable(adev, false);
5270 static int gfx_v8_0_soft_reset(void *handle)
5272 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5273 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5276 if ((!adev->gfx.grbm_soft_reset) &&
5277 (!adev->gfx.srbm_soft_reset))
5280 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5281 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5283 if (grbm_soft_reset || srbm_soft_reset) {
5284 tmp = RREG32(mmGMCON_DEBUG);
5285 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5286 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5287 WREG32(mmGMCON_DEBUG, tmp);
5291 if (grbm_soft_reset) {
5292 tmp = RREG32(mmGRBM_SOFT_RESET);
5293 tmp |= grbm_soft_reset;
5294 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5295 WREG32(mmGRBM_SOFT_RESET, tmp);
5296 tmp = RREG32(mmGRBM_SOFT_RESET);
5300 tmp &= ~grbm_soft_reset;
5301 WREG32(mmGRBM_SOFT_RESET, tmp);
5302 tmp = RREG32(mmGRBM_SOFT_RESET);
5305 if (srbm_soft_reset) {
5306 tmp = RREG32(mmSRBM_SOFT_RESET);
5307 tmp |= srbm_soft_reset;
5308 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5309 WREG32(mmSRBM_SOFT_RESET, tmp);
5310 tmp = RREG32(mmSRBM_SOFT_RESET);
5314 tmp &= ~srbm_soft_reset;
5315 WREG32(mmSRBM_SOFT_RESET, tmp);
5316 tmp = RREG32(mmSRBM_SOFT_RESET);
5319 if (grbm_soft_reset || srbm_soft_reset) {
5320 tmp = RREG32(mmGMCON_DEBUG);
5321 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5322 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5323 WREG32(mmGMCON_DEBUG, tmp);
5326 /* Wait a little for things to settle down */
5332 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5333 struct amdgpu_ring *ring)
5335 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5336 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5337 WREG32(mmCP_HQD_PQ_RPTR, 0);
5338 WREG32(mmCP_HQD_PQ_WPTR, 0);
5339 vi_srbm_select(adev, 0, 0, 0, 0);
5342 static int gfx_v8_0_post_soft_reset(void *handle)
5344 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5345 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5347 if ((!adev->gfx.grbm_soft_reset) &&
5348 (!adev->gfx.srbm_soft_reset))
5351 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5352 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5354 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5355 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5356 gfx_v8_0_cp_gfx_resume(adev);
5358 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5359 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5360 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5361 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5364 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5365 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5367 gfx_v8_0_init_hqd(adev, ring);
5369 gfx_v8_0_cp_compute_resume(adev);
5371 gfx_v8_0_rlc_start(adev);
5377 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5379 * @adev: amdgpu_device pointer
5381 * Fetches a GPU clock counter snapshot.
5382 * Returns the 64 bit clock counter snapshot.
5384 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5388 mutex_lock(&adev->gfx.gpu_clock_mutex);
5389 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5390 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5391 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5392 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5396 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5398 uint32_t gds_base, uint32_t gds_size,
5399 uint32_t gws_base, uint32_t gws_size,
5400 uint32_t oa_base, uint32_t oa_size)
5402 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5403 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5405 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5406 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5408 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5409 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5412 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5413 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5414 WRITE_DATA_DST_SEL(0)));
5415 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5416 amdgpu_ring_write(ring, 0);
5417 amdgpu_ring_write(ring, gds_base);
5420 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5421 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5422 WRITE_DATA_DST_SEL(0)));
5423 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5424 amdgpu_ring_write(ring, 0);
5425 amdgpu_ring_write(ring, gds_size);
5428 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5429 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5430 WRITE_DATA_DST_SEL(0)));
5431 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5432 amdgpu_ring_write(ring, 0);
5433 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5436 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5437 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5438 WRITE_DATA_DST_SEL(0)));
5439 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5440 amdgpu_ring_write(ring, 0);
5441 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5444 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5445 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5446 .select_se_sh = &gfx_v8_0_select_se_sh,
5449 static int gfx_v8_0_early_init(void *handle)
5451 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5453 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5454 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5455 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5456 gfx_v8_0_set_ring_funcs(adev);
5457 gfx_v8_0_set_irq_funcs(adev);
5458 gfx_v8_0_set_gds_init(adev);
5459 gfx_v8_0_set_rlc_funcs(adev);
5464 static int gfx_v8_0_late_init(void *handle)
5466 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5469 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5473 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5477 /* requires IBs so do in late init after IB pool is initialized */
5478 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5482 amdgpu_set_powergating_state(adev,
5483 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5488 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5491 if (adev->asic_type == CHIP_POLARIS11)
5492 /* Send msg to SMU via Powerplay */
5493 amdgpu_set_powergating_state(adev,
5494 AMD_IP_BLOCK_TYPE_SMC,
5496 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5498 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5501 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5504 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5507 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5510 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5513 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5516 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5519 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5522 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5524 /* Read any GFX register to wake up GFX. */
5526 RREG32(mmDB_RENDER_CONTROL);
5529 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5532 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5533 cz_enable_gfx_cg_power_gating(adev, true);
5534 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5535 cz_enable_gfx_pipeline_power_gating(adev, true);
5537 cz_enable_gfx_cg_power_gating(adev, false);
5538 cz_enable_gfx_pipeline_power_gating(adev, false);
5542 static int gfx_v8_0_set_powergating_state(void *handle,
5543 enum amd_powergating_state state)
5545 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5546 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5548 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5551 switch (adev->asic_type) {
5554 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5555 cz_update_gfx_cg_power_gating(adev, enable);
5557 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5558 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5560 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5562 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5563 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5565 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5567 case CHIP_POLARIS11:
5568 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5569 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5571 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5573 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5574 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5576 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5578 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5579 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5581 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5590 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5591 uint32_t reg_addr, uint32_t cmd)
5595 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5597 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5598 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5600 data = RREG32(mmRLC_SERDES_WR_CTRL);
5601 if (adev->asic_type == CHIP_STONEY)
5602 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5603 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5604 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5605 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5606 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5607 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5608 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5609 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5610 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5612 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5613 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5614 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5615 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5616 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5617 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5618 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5619 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5620 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5621 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5622 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5623 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5624 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5625 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5626 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5628 WREG32(mmRLC_SERDES_WR_CTRL, data);
5631 #define MSG_ENTER_RLC_SAFE_MODE 1
5632 #define MSG_EXIT_RLC_SAFE_MODE 0
5633 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5634 #define RLC_GPR_REG2__REQ__SHIFT 0
5635 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5636 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5638 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5643 data = RREG32(mmRLC_CNTL);
5644 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5647 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5648 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5649 AMD_PG_SUPPORT_GFX_DMG))) {
5650 data |= RLC_GPR_REG2__REQ_MASK;
5651 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5652 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5653 WREG32(mmRLC_GPR_REG2, data);
5655 for (i = 0; i < adev->usec_timeout; i++) {
5656 if ((RREG32(mmRLC_GPM_STAT) &
5657 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5658 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5659 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5660 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5665 for (i = 0; i < adev->usec_timeout; i++) {
5666 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5670 adev->gfx.rlc.in_safe_mode = true;
5674 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5679 data = RREG32(mmRLC_CNTL);
5680 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5683 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5684 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5685 AMD_PG_SUPPORT_GFX_DMG))) {
5686 data |= RLC_GPR_REG2__REQ_MASK;
5687 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5688 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5689 WREG32(mmRLC_GPR_REG2, data);
5690 adev->gfx.rlc.in_safe_mode = false;
5693 for (i = 0; i < adev->usec_timeout; i++) {
5694 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5700 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5705 data = RREG32(mmRLC_CNTL);
5706 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5709 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5710 data |= RLC_SAFE_MODE__CMD_MASK;
5711 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5712 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5713 WREG32(mmRLC_SAFE_MODE, data);
5715 for (i = 0; i < adev->usec_timeout; i++) {
5716 if ((RREG32(mmRLC_GPM_STAT) &
5717 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5718 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5719 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5720 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5725 for (i = 0; i < adev->usec_timeout; i++) {
5726 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5730 adev->gfx.rlc.in_safe_mode = true;
5734 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5739 data = RREG32(mmRLC_CNTL);
5740 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5743 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5744 if (adev->gfx.rlc.in_safe_mode) {
5745 data |= RLC_SAFE_MODE__CMD_MASK;
5746 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5747 WREG32(mmRLC_SAFE_MODE, data);
5748 adev->gfx.rlc.in_safe_mode = false;
5752 for (i = 0; i < adev->usec_timeout; i++) {
5753 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5759 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5761 adev->gfx.rlc.in_safe_mode = true;
5764 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5766 adev->gfx.rlc.in_safe_mode = false;
5769 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5770 .enter_safe_mode = cz_enter_rlc_safe_mode,
5771 .exit_safe_mode = cz_exit_rlc_safe_mode
5774 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5775 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5776 .exit_safe_mode = iceland_exit_rlc_safe_mode
5779 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5780 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5781 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5784 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5787 uint32_t temp, data;
5789 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5791 /* It is disabled by HW by default */
5792 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5793 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5794 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5795 /* 1 - RLC memory Light sleep */
5796 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5798 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5799 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5802 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5803 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5804 if (adev->flags & AMD_IS_APU)
5805 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5806 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5807 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5809 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5810 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5811 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5812 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5815 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5817 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5818 gfx_v8_0_wait_for_rlc_serdes(adev);
5820 /* 5 - clear mgcg override */
5821 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5823 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5824 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5825 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5826 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5827 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5828 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5829 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5830 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5831 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5832 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5833 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5834 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5836 WREG32(mmCGTS_SM_CTRL_REG, data);
5840 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5841 gfx_v8_0_wait_for_rlc_serdes(adev);
5843 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5844 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5845 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5846 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5847 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5848 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5850 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5852 /* 2 - disable MGLS in RLC */
5853 data = RREG32(mmRLC_MEM_SLP_CNTL);
5854 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5855 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5856 WREG32(mmRLC_MEM_SLP_CNTL, data);
5859 /* 3 - disable MGLS in CP */
5860 data = RREG32(mmCP_MEM_SLP_CNTL);
5861 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5862 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5863 WREG32(mmCP_MEM_SLP_CNTL, data);
5866 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5867 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5868 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5869 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5871 WREG32(mmCGTS_SM_CTRL_REG, data);
5873 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5874 gfx_v8_0_wait_for_rlc_serdes(adev);
5876 /* 6 - set mgcg override */
5877 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5881 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5882 gfx_v8_0_wait_for_rlc_serdes(adev);
5885 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5888 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5891 uint32_t temp, temp1, data, data1;
5893 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5895 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5897 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5898 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5899 * Cmp_busy/GFX_Idle interrupts
5901 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5903 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5904 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5906 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5908 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5909 gfx_v8_0_wait_for_rlc_serdes(adev);
5911 /* 3 - clear cgcg override */
5912 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5914 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5915 gfx_v8_0_wait_for_rlc_serdes(adev);
5917 /* 4 - write cmd to set CGLS */
5918 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5920 /* 5 - enable cgcg */
5921 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5923 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5925 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5927 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5928 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5931 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5933 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5937 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5939 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5940 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5943 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5944 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5945 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5947 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5949 /* read gfx register to wake up cgcg */
5950 RREG32(mmCB_CGTT_SCLK_CTRL);
5951 RREG32(mmCB_CGTT_SCLK_CTRL);
5952 RREG32(mmCB_CGTT_SCLK_CTRL);
5953 RREG32(mmCB_CGTT_SCLK_CTRL);
5955 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5956 gfx_v8_0_wait_for_rlc_serdes(adev);
5958 /* write cmd to Set CGCG Overrride */
5959 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5961 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5962 gfx_v8_0_wait_for_rlc_serdes(adev);
5964 /* write cmd to Clear CGLS */
5965 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5967 /* disable cgcg, cgls should be disabled too. */
5968 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5969 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5971 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5974 gfx_v8_0_wait_for_rlc_serdes(adev);
5976 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5978 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5982 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5983 * === MGCG + MGLS + TS(CG/LS) ===
5985 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5986 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5988 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5989 * === CGCG + CGLS ===
5991 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5992 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5997 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5998 enum amd_clockgating_state state)
6000 uint32_t msg_id, pp_state;
6001 void *pp_handle = adev->powerplay.pp_handle;
6003 if (state == AMD_CG_STATE_UNGATE)
6006 pp_state = PP_STATE_CG | PP_STATE_LS;
6008 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6010 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6012 amd_set_clockgating_by_smu(pp_handle, msg_id);
6014 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6016 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6018 amd_set_clockgating_by_smu(pp_handle, msg_id);
6023 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6024 enum amd_clockgating_state state)
6026 uint32_t msg_id, pp_state;
6027 void *pp_handle = adev->powerplay.pp_handle;
6029 if (state == AMD_CG_STATE_UNGATE)
6032 pp_state = PP_STATE_CG | PP_STATE_LS;
6034 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6036 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6038 amd_set_clockgating_by_smu(pp_handle, msg_id);
6040 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6042 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6044 amd_set_clockgating_by_smu(pp_handle, msg_id);
6046 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6048 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6050 amd_set_clockgating_by_smu(pp_handle, msg_id);
6052 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6054 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6056 amd_set_clockgating_by_smu(pp_handle, msg_id);
6058 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6060 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6062 amd_set_clockgating_by_smu(pp_handle, msg_id);
6067 static int gfx_v8_0_set_clockgating_state(void *handle,
6068 enum amd_clockgating_state state)
6070 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6072 switch (adev->asic_type) {
6076 gfx_v8_0_update_gfx_clock_gating(adev,
6077 state == AMD_CG_STATE_GATE ? true : false);
6080 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6082 case CHIP_POLARIS10:
6083 case CHIP_POLARIS11:
6084 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6092 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6094 return ring->adev->wb.wb[ring->rptr_offs];
6097 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6099 struct amdgpu_device *adev = ring->adev;
6101 if (ring->use_doorbell)
6102 /* XXX check if swapping is necessary on BE */
6103 return ring->adev->wb.wb[ring->wptr_offs];
6105 return RREG32(mmCP_RB0_WPTR);
6108 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6110 struct amdgpu_device *adev = ring->adev;
6112 if (ring->use_doorbell) {
6113 /* XXX check if swapping is necessary on BE */
6114 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6115 WDOORBELL32(ring->doorbell_index, ring->wptr);
6117 WREG32(mmCP_RB0_WPTR, ring->wptr);
6118 (void)RREG32(mmCP_RB0_WPTR);
6122 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6124 u32 ref_and_mask, reg_mem_engine;
6126 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
6129 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6132 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6139 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6140 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6143 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6144 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6145 WAIT_REG_MEM_FUNCTION(3) | /* == */
6147 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6148 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6149 amdgpu_ring_write(ring, ref_and_mask);
6150 amdgpu_ring_write(ring, ref_and_mask);
6151 amdgpu_ring_write(ring, 0x20); /* poll interval */
6154 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6156 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6157 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6158 WRITE_DATA_DST_SEL(0) |
6160 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6161 amdgpu_ring_write(ring, 0);
6162 amdgpu_ring_write(ring, 1);
6166 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6167 struct amdgpu_ib *ib,
6168 unsigned vm_id, bool ctx_switch)
6170 u32 header, control = 0;
6172 if (ib->flags & AMDGPU_IB_FLAG_CE)
6173 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6175 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6177 control |= ib->length_dw | (vm_id << 24);
6179 amdgpu_ring_write(ring, header);
6180 amdgpu_ring_write(ring,
6184 (ib->gpu_addr & 0xFFFFFFFC));
6185 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6186 amdgpu_ring_write(ring, control);
6189 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6190 struct amdgpu_ib *ib,
6191 unsigned vm_id, bool ctx_switch)
6193 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6195 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6196 amdgpu_ring_write(ring,
6200 (ib->gpu_addr & 0xFFFFFFFC));
6201 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6202 amdgpu_ring_write(ring, control);
6205 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6206 u64 seq, unsigned flags)
6208 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6209 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6211 /* EVENT_WRITE_EOP - flush caches, send int */
6212 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6213 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6215 EOP_TC_WB_ACTION_EN |
6216 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6218 amdgpu_ring_write(ring, addr & 0xfffffffc);
6219 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6220 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6221 amdgpu_ring_write(ring, lower_32_bits(seq));
6222 amdgpu_ring_write(ring, upper_32_bits(seq));
6226 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6228 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6229 uint32_t seq = ring->fence_drv.sync_seq;
6230 uint64_t addr = ring->fence_drv.gpu_addr;
6232 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6233 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6234 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6235 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6236 amdgpu_ring_write(ring, addr & 0xfffffffc);
6237 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6238 amdgpu_ring_write(ring, seq);
6239 amdgpu_ring_write(ring, 0xffffffff);
6240 amdgpu_ring_write(ring, 4); /* poll interval */
6243 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6244 unsigned vm_id, uint64_t pd_addr)
6246 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6248 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6249 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6250 WRITE_DATA_DST_SEL(0)) |
6253 amdgpu_ring_write(ring,
6254 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6256 amdgpu_ring_write(ring,
6257 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6259 amdgpu_ring_write(ring, 0);
6260 amdgpu_ring_write(ring, pd_addr >> 12);
6262 /* bits 0-15 are the VM contexts0-15 */
6263 /* invalidate the cache */
6264 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6265 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6266 WRITE_DATA_DST_SEL(0)));
6267 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6268 amdgpu_ring_write(ring, 0);
6269 amdgpu_ring_write(ring, 1 << vm_id);
6271 /* wait for the invalidate to complete */
6272 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6273 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6274 WAIT_REG_MEM_FUNCTION(0) | /* always */
6275 WAIT_REG_MEM_ENGINE(0))); /* me */
6276 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6277 amdgpu_ring_write(ring, 0);
6278 amdgpu_ring_write(ring, 0); /* ref */
6279 amdgpu_ring_write(ring, 0); /* mask */
6280 amdgpu_ring_write(ring, 0x20); /* poll interval */
6282 /* compute doesn't have PFP */
6284 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6285 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6286 amdgpu_ring_write(ring, 0x0);
6287 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6288 amdgpu_ring_insert_nop(ring, 128);
6292 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6294 return ring->adev->wb.wb[ring->wptr_offs];
6297 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6299 struct amdgpu_device *adev = ring->adev;
6301 /* XXX check if swapping is necessary on BE */
6302 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6303 WDOORBELL32(ring->doorbell_index, ring->wptr);
6306 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6310 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6311 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6313 /* RELEASE_MEM - flush caches, send int */
6314 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6315 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6317 EOP_TC_WB_ACTION_EN |
6318 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6320 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6321 amdgpu_ring_write(ring, addr & 0xfffffffc);
6322 amdgpu_ring_write(ring, upper_32_bits(addr));
6323 amdgpu_ring_write(ring, lower_32_bits(seq));
6324 amdgpu_ring_write(ring, upper_32_bits(seq));
6327 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6329 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6330 amdgpu_ring_write(ring, 0);
6333 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6337 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6338 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6339 /* set load_global_config & load_global_uconfig */
6341 /* set load_cs_sh_regs */
6343 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6346 /* set load_ce_ram if preamble presented */
6347 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6350 /* still load_ce_ram if this is the first time preamble presented
6351 * although there is no context switch happens.
6353 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6357 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6358 amdgpu_ring_write(ring, dw2);
6359 amdgpu_ring_write(ring, 0);
6362 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6363 enum amdgpu_interrupt_state state)
6365 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6366 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6369 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6371 enum amdgpu_interrupt_state state)
6374 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6375 * handles the setting of interrupts for this specific pipe. All other
6376 * pipes' interrupts are set by amdkfd.
6384 DRM_DEBUG("invalid pipe %d\n", pipe);
6388 DRM_DEBUG("invalid me %d\n", me);
6392 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6393 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6396 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6397 struct amdgpu_irq_src *source,
6399 enum amdgpu_interrupt_state state)
6401 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6402 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6407 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6408 struct amdgpu_irq_src *source,
6410 enum amdgpu_interrupt_state state)
6412 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6413 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6418 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6419 struct amdgpu_irq_src *src,
6421 enum amdgpu_interrupt_state state)
6424 case AMDGPU_CP_IRQ_GFX_EOP:
6425 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6427 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6428 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6430 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6431 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6433 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6434 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6436 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6437 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6439 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6440 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6442 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6443 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6445 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6446 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6448 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6449 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6457 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6458 struct amdgpu_irq_src *source,
6459 struct amdgpu_iv_entry *entry)
6462 u8 me_id, pipe_id, queue_id;
6463 struct amdgpu_ring *ring;
6465 DRM_DEBUG("IH: CP EOP\n");
6466 me_id = (entry->ring_id & 0x0c) >> 2;
6467 pipe_id = (entry->ring_id & 0x03) >> 0;
6468 queue_id = (entry->ring_id & 0x70) >> 4;
6472 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6476 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6477 ring = &adev->gfx.compute_ring[i];
6478 /* Per-queue interrupt is supported for MEC starting from VI.
6479 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6481 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6482 amdgpu_fence_process(ring);
6489 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6490 struct amdgpu_irq_src *source,
6491 struct amdgpu_iv_entry *entry)
6493 DRM_ERROR("Illegal register access in command stream\n");
6494 schedule_work(&adev->reset_work);
6498 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6499 struct amdgpu_irq_src *source,
6500 struct amdgpu_iv_entry *entry)
6502 DRM_ERROR("Illegal instruction in command stream\n");
6503 schedule_work(&adev->reset_work);
6507 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6509 .early_init = gfx_v8_0_early_init,
6510 .late_init = gfx_v8_0_late_init,
6511 .sw_init = gfx_v8_0_sw_init,
6512 .sw_fini = gfx_v8_0_sw_fini,
6513 .hw_init = gfx_v8_0_hw_init,
6514 .hw_fini = gfx_v8_0_hw_fini,
6515 .suspend = gfx_v8_0_suspend,
6516 .resume = gfx_v8_0_resume,
6517 .is_idle = gfx_v8_0_is_idle,
6518 .wait_for_idle = gfx_v8_0_wait_for_idle,
6519 .check_soft_reset = gfx_v8_0_check_soft_reset,
6520 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6521 .soft_reset = gfx_v8_0_soft_reset,
6522 .post_soft_reset = gfx_v8_0_post_soft_reset,
6523 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6524 .set_powergating_state = gfx_v8_0_set_powergating_state,
6527 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6528 .type = AMDGPU_RING_TYPE_GFX,
6530 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6531 .get_rptr = gfx_v8_0_ring_get_rptr,
6532 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6533 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6535 20 + /* gfx_v8_0_ring_emit_gds_switch */
6536 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6537 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6538 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6539 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6540 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6541 2 + /* gfx_v8_ring_emit_sb */
6542 3, /* gfx_v8_ring_emit_cntxcntl */
6543 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6544 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6545 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6546 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6547 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6548 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6549 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6550 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6551 .test_ring = gfx_v8_0_ring_test_ring,
6552 .test_ib = gfx_v8_0_ring_test_ib,
6553 .insert_nop = amdgpu_ring_insert_nop,
6554 .pad_ib = amdgpu_ring_generic_pad_ib,
6555 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6556 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6559 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6560 .type = AMDGPU_RING_TYPE_COMPUTE,
6562 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6563 .get_rptr = gfx_v8_0_ring_get_rptr,
6564 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6565 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6567 20 + /* gfx_v8_0_ring_emit_gds_switch */
6568 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6569 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6570 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6571 17 + /* gfx_v8_0_ring_emit_vm_flush */
6572 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6573 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6574 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6575 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6576 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6577 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6578 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6579 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6580 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6581 .test_ring = gfx_v8_0_ring_test_ring,
6582 .test_ib = gfx_v8_0_ring_test_ib,
6583 .insert_nop = amdgpu_ring_insert_nop,
6584 .pad_ib = amdgpu_ring_generic_pad_ib,
6587 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6591 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6592 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6594 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6595 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6598 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6599 .set = gfx_v8_0_set_eop_interrupt_state,
6600 .process = gfx_v8_0_eop_irq,
6603 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6604 .set = gfx_v8_0_set_priv_reg_fault_state,
6605 .process = gfx_v8_0_priv_reg_irq,
6608 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6609 .set = gfx_v8_0_set_priv_inst_fault_state,
6610 .process = gfx_v8_0_priv_inst_irq,
6613 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6615 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6616 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6618 adev->gfx.priv_reg_irq.num_types = 1;
6619 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6621 adev->gfx.priv_inst_irq.num_types = 1;
6622 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6625 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6627 switch (adev->asic_type) {
6629 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6633 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6636 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6641 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6643 /* init asci gds info */
6644 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6645 adev->gds.gws.total_size = 64;
6646 adev->gds.oa.total_size = 16;
6648 if (adev->gds.mem.total_size == 64 * 1024) {
6649 adev->gds.mem.gfx_partition_size = 4096;
6650 adev->gds.mem.cs_partition_size = 4096;
6652 adev->gds.gws.gfx_partition_size = 4;
6653 adev->gds.gws.cs_partition_size = 4;
6655 adev->gds.oa.gfx_partition_size = 4;
6656 adev->gds.oa.cs_partition_size = 1;
6658 adev->gds.mem.gfx_partition_size = 1024;
6659 adev->gds.mem.cs_partition_size = 1024;
6661 adev->gds.gws.gfx_partition_size = 16;
6662 adev->gds.gws.cs_partition_size = 16;
6664 adev->gds.oa.gfx_partition_size = 4;
6665 adev->gds.oa.cs_partition_size = 4;
6669 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6677 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6678 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6680 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6683 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6687 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6688 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6690 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6692 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6695 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6697 int i, j, k, counter, active_cu_number = 0;
6698 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6699 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6700 unsigned disable_masks[4 * 2];
6702 memset(cu_info, 0, sizeof(*cu_info));
6704 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6706 mutex_lock(&adev->grbm_idx_mutex);
6707 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6708 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6712 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6714 gfx_v8_0_set_user_cu_inactive_bitmap(
6715 adev, disable_masks[i * 2 + j]);
6716 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6717 cu_info->bitmap[i][j] = bitmap;
6719 for (k = 0; k < 16; k ++) {
6720 if (bitmap & mask) {
6727 active_cu_number += counter;
6728 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6731 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6732 mutex_unlock(&adev->grbm_idx_mutex);
6734 cu_info->number = active_cu_number;
6735 cu_info->ao_cu_mask = ao_cu_mask;
6738 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6740 .type = AMD_IP_BLOCK_TYPE_GFX,
6744 .funcs = &gfx_v8_0_ip_funcs,
6747 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6749 .type = AMD_IP_BLOCK_TYPE_GFX,
6753 .funcs = &gfx_v8_0_ip_funcs,