2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #define GFX8_NUM_GFX_RINGS 1
53 #define GFX8_NUM_COMPUTE_RINGS 8
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
59 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
69 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
70 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
72 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
73 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
77 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
79 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
80 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
82 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
83 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
85 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
86 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
89 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
90 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
92 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
93 MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
96 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
97 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
99 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
100 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
103 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
105 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
106 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
107 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
108 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
109 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
110 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
111 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
112 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
113 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
114 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
115 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
116 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
117 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
118 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
119 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
120 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
123 static const u32 golden_settings_tonga_a11[] =
125 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
126 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
127 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
128 mmGB_GPU_ID, 0x0000000f, 0x00000000,
129 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
130 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
131 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
132 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
133 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
134 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
135 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
136 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
137 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
138 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
139 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
142 static const u32 tonga_golden_common_all[] =
144 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
145 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
146 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
147 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
148 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
149 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
150 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
151 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
154 static const u32 tonga_mgcg_cgcg_init[] =
156 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
157 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
158 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
159 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
160 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
161 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
162 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
163 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
164 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
165 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
166 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
167 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
168 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
169 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
170 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
171 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
172 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
173 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
174 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
175 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
176 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
177 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
178 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
179 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
180 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
181 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
182 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
183 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
184 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
185 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
186 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
187 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
188 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
189 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
190 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
191 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
192 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
193 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
194 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
195 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
196 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
197 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
198 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
199 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
200 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
201 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
202 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
203 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
204 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
205 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
206 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
207 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
228 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
229 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
230 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
233 static const u32 fiji_golden_common_all[] =
235 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
236 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
237 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
238 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
239 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
240 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
241 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
242 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
243 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
244 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
247 static const u32 golden_settings_fiji_a10[] =
249 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
250 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
251 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
252 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
253 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
254 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
255 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
256 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
257 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
258 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
259 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
262 static const u32 fiji_mgcg_cgcg_init[] =
264 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
265 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
266 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
267 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
268 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
269 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
270 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
271 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
272 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
273 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
274 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
275 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
276 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
277 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
278 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
279 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
280 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
281 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
282 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
283 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
284 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
285 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
286 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
287 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
288 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
289 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
290 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
291 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
292 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
293 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
294 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
296 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
297 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
298 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
301 static const u32 golden_settings_iceland_a11[] =
303 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
304 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
305 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
306 mmGB_GPU_ID, 0x0000000f, 0x00000000,
307 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
308 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
309 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
310 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
311 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
312 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
313 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
314 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
315 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
316 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
317 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
320 static const u32 iceland_golden_common_all[] =
322 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
323 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
324 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
325 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
326 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
327 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
328 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
329 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
332 static const u32 iceland_mgcg_cgcg_init[] =
334 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
335 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
336 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
337 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
338 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
339 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
340 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
341 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
342 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
343 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
344 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
345 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
346 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
347 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
348 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
349 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
350 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
351 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
352 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
353 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
354 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
355 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
356 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
357 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
358 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
359 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
360 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
361 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
363 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
364 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
365 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
366 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
367 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
368 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
369 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
370 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
371 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
372 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
373 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
374 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
375 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
376 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
377 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
378 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
379 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
380 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
381 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
382 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
383 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
384 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
385 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
386 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
387 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
388 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
389 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
390 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
391 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
392 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
393 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
394 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
395 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
396 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
397 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400 static const u32 cz_golden_settings_a11[] =
402 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
403 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
404 mmGB_GPU_ID, 0x0000000f, 0x00000000,
405 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
406 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
407 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
409 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
410 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
411 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
414 static const u32 cz_golden_common_all[] =
416 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
417 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
418 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
419 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
420 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
421 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
422 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
423 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
426 static const u32 cz_mgcg_cgcg_init[] =
428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
460 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
461 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
462 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
463 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
464 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
465 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
466 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
467 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
468 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
469 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
470 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
471 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
472 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
473 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
474 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
475 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
476 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
477 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
478 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
479 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
480 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
481 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
482 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
483 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
484 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
485 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
486 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
487 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
488 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
489 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
490 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
491 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
492 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
493 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
494 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
495 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
496 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
497 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
498 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
499 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
500 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
501 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
502 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
505 static const u32 stoney_golden_settings_a11[] =
507 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
508 mmGB_GPU_ID, 0x0000000f, 0x00000000,
509 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
510 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
511 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
512 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
513 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
516 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
519 static const u32 stoney_golden_common_all[] =
521 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
523 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
525 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
531 static const u32 stoney_mgcg_cgcg_init[] =
533 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
534 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
535 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
536 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
537 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
538 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
541 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
542 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
543 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
545 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
547 switch (adev->asic_type) {
549 amdgpu_program_register_sequence(adev,
550 iceland_mgcg_cgcg_init,
551 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
552 amdgpu_program_register_sequence(adev,
553 golden_settings_iceland_a11,
554 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
555 amdgpu_program_register_sequence(adev,
556 iceland_golden_common_all,
557 (const u32)ARRAY_SIZE(iceland_golden_common_all));
560 amdgpu_program_register_sequence(adev,
562 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
563 amdgpu_program_register_sequence(adev,
564 golden_settings_fiji_a10,
565 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
566 amdgpu_program_register_sequence(adev,
567 fiji_golden_common_all,
568 (const u32)ARRAY_SIZE(fiji_golden_common_all));
572 amdgpu_program_register_sequence(adev,
573 tonga_mgcg_cgcg_init,
574 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
575 amdgpu_program_register_sequence(adev,
576 golden_settings_tonga_a11,
577 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
578 amdgpu_program_register_sequence(adev,
579 tonga_golden_common_all,
580 (const u32)ARRAY_SIZE(tonga_golden_common_all));
583 amdgpu_program_register_sequence(adev,
585 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
586 amdgpu_program_register_sequence(adev,
587 cz_golden_settings_a11,
588 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
589 amdgpu_program_register_sequence(adev,
590 cz_golden_common_all,
591 (const u32)ARRAY_SIZE(cz_golden_common_all));
594 amdgpu_program_register_sequence(adev,
595 stoney_mgcg_cgcg_init,
596 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
597 amdgpu_program_register_sequence(adev,
598 stoney_golden_settings_a11,
599 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
600 amdgpu_program_register_sequence(adev,
601 stoney_golden_common_all,
602 (const u32)ARRAY_SIZE(stoney_golden_common_all));
609 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
613 adev->gfx.scratch.num_reg = 7;
614 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
615 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
616 adev->gfx.scratch.free[i] = true;
617 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
621 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
623 struct amdgpu_device *adev = ring->adev;
629 r = amdgpu_gfx_scratch_get(adev, &scratch);
631 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
634 WREG32(scratch, 0xCAFEDEAD);
635 r = amdgpu_ring_lock(ring, 3);
637 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
639 amdgpu_gfx_scratch_free(adev, scratch);
642 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
643 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
644 amdgpu_ring_write(ring, 0xDEADBEEF);
645 amdgpu_ring_unlock_commit(ring);
647 for (i = 0; i < adev->usec_timeout; i++) {
648 tmp = RREG32(scratch);
649 if (tmp == 0xDEADBEEF)
653 if (i < adev->usec_timeout) {
654 DRM_INFO("ring test on %d succeeded in %d usecs\n",
657 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
658 ring->idx, scratch, tmp);
661 amdgpu_gfx_scratch_free(adev, scratch);
665 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
667 struct amdgpu_device *adev = ring->adev;
669 struct fence *f = NULL;
675 r = amdgpu_gfx_scratch_get(adev, &scratch);
677 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
680 WREG32(scratch, 0xCAFEDEAD);
681 memset(&ib, 0, sizeof(ib));
682 r = amdgpu_ib_get(ring, NULL, 256, &ib);
684 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
687 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
688 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
689 ib.ptr[2] = 0xDEADBEEF;
692 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
693 AMDGPU_FENCE_OWNER_UNDEFINED,
698 r = fence_wait(f, false);
700 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
703 for (i = 0; i < adev->usec_timeout; i++) {
704 tmp = RREG32(scratch);
705 if (tmp == 0xDEADBEEF)
709 if (i < adev->usec_timeout) {
710 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
714 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
720 amdgpu_ib_free(adev, &ib);
722 amdgpu_gfx_scratch_free(adev, scratch);
726 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
728 const char *chip_name;
731 struct amdgpu_firmware_info *info = NULL;
732 const struct common_firmware_header *header = NULL;
733 const struct gfx_firmware_header_v1_0 *cp_hdr;
737 switch (adev->asic_type) {
745 chip_name = "carrizo";
751 chip_name = "stoney";
757 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
758 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
761 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
764 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
765 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
766 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
768 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
769 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
772 err = amdgpu_ucode_validate(adev->gfx.me_fw);
775 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
776 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
777 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
779 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
780 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
783 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
786 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
787 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
788 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
790 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
791 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
794 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
795 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
796 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
797 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
799 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
800 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
803 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
806 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
807 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
808 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
810 if (adev->asic_type != CHIP_STONEY) {
811 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
812 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
814 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
817 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
818 adev->gfx.mec2_fw->data;
819 adev->gfx.mec2_fw_version =
820 le32_to_cpu(cp_hdr->header.ucode_version);
821 adev->gfx.mec2_feature_version =
822 le32_to_cpu(cp_hdr->ucode_feature_version);
825 adev->gfx.mec2_fw = NULL;
829 if (adev->firmware.smu_load) {
830 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
831 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
832 info->fw = adev->gfx.pfp_fw;
833 header = (const struct common_firmware_header *)info->fw->data;
834 adev->firmware.fw_size +=
835 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
837 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
838 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
839 info->fw = adev->gfx.me_fw;
840 header = (const struct common_firmware_header *)info->fw->data;
841 adev->firmware.fw_size +=
842 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
844 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
845 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
846 info->fw = adev->gfx.ce_fw;
847 header = (const struct common_firmware_header *)info->fw->data;
848 adev->firmware.fw_size +=
849 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
851 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
852 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
853 info->fw = adev->gfx.rlc_fw;
854 header = (const struct common_firmware_header *)info->fw->data;
855 adev->firmware.fw_size +=
856 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
858 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
859 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
860 info->fw = adev->gfx.mec_fw;
861 header = (const struct common_firmware_header *)info->fw->data;
862 adev->firmware.fw_size +=
863 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
865 if (adev->gfx.mec2_fw) {
866 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
867 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
868 info->fw = adev->gfx.mec2_fw;
869 header = (const struct common_firmware_header *)info->fw->data;
870 adev->firmware.fw_size +=
871 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
879 "gfx8: Failed to load firmware \"%s\"\n",
881 release_firmware(adev->gfx.pfp_fw);
882 adev->gfx.pfp_fw = NULL;
883 release_firmware(adev->gfx.me_fw);
884 adev->gfx.me_fw = NULL;
885 release_firmware(adev->gfx.ce_fw);
886 adev->gfx.ce_fw = NULL;
887 release_firmware(adev->gfx.rlc_fw);
888 adev->gfx.rlc_fw = NULL;
889 release_firmware(adev->gfx.mec_fw);
890 adev->gfx.mec_fw = NULL;
891 release_firmware(adev->gfx.mec2_fw);
892 adev->gfx.mec2_fw = NULL;
897 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
901 if (adev->gfx.mec.hpd_eop_obj) {
902 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
903 if (unlikely(r != 0))
904 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
905 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
906 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
908 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
909 adev->gfx.mec.hpd_eop_obj = NULL;
913 #define MEC_HPD_SIZE 2048
915 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
921 * we assign only 1 pipe because all other pipes will
924 adev->gfx.mec.num_mec = 1;
925 adev->gfx.mec.num_pipe = 1;
926 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
928 if (adev->gfx.mec.hpd_eop_obj == NULL) {
929 r = amdgpu_bo_create(adev,
930 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
932 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
933 &adev->gfx.mec.hpd_eop_obj);
935 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
940 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
941 if (unlikely(r != 0)) {
942 gfx_v8_0_mec_fini(adev);
945 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
946 &adev->gfx.mec.hpd_eop_gpu_addr);
948 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
949 gfx_v8_0_mec_fini(adev);
952 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
954 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
955 gfx_v8_0_mec_fini(adev);
959 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
961 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
962 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
967 static const u32 vgpr_init_compute_shader[] =
969 0x7e000209, 0x7e020208,
970 0x7e040207, 0x7e060206,
971 0x7e080205, 0x7e0a0204,
972 0x7e0c0203, 0x7e0e0202,
973 0x7e100201, 0x7e120200,
974 0x7e140209, 0x7e160208,
975 0x7e180207, 0x7e1a0206,
976 0x7e1c0205, 0x7e1e0204,
977 0x7e200203, 0x7e220202,
978 0x7e240201, 0x7e260200,
979 0x7e280209, 0x7e2a0208,
980 0x7e2c0207, 0x7e2e0206,
981 0x7e300205, 0x7e320204,
982 0x7e340203, 0x7e360202,
983 0x7e380201, 0x7e3a0200,
984 0x7e3c0209, 0x7e3e0208,
985 0x7e400207, 0x7e420206,
986 0x7e440205, 0x7e460204,
987 0x7e480203, 0x7e4a0202,
988 0x7e4c0201, 0x7e4e0200,
989 0x7e500209, 0x7e520208,
990 0x7e540207, 0x7e560206,
991 0x7e580205, 0x7e5a0204,
992 0x7e5c0203, 0x7e5e0202,
993 0x7e600201, 0x7e620200,
994 0x7e640209, 0x7e660208,
995 0x7e680207, 0x7e6a0206,
996 0x7e6c0205, 0x7e6e0204,
997 0x7e700203, 0x7e720202,
998 0x7e740201, 0x7e760200,
999 0x7e780209, 0x7e7a0208,
1000 0x7e7c0207, 0x7e7e0206,
1001 0xbf8a0000, 0xbf810000,
1004 static const u32 sgpr_init_compute_shader[] =
1006 0xbe8a0100, 0xbe8c0102,
1007 0xbe8e0104, 0xbe900106,
1008 0xbe920108, 0xbe940100,
1009 0xbe960102, 0xbe980104,
1010 0xbe9a0106, 0xbe9c0108,
1011 0xbe9e0100, 0xbea00102,
1012 0xbea20104, 0xbea40106,
1013 0xbea60108, 0xbea80100,
1014 0xbeaa0102, 0xbeac0104,
1015 0xbeae0106, 0xbeb00108,
1016 0xbeb20100, 0xbeb40102,
1017 0xbeb60104, 0xbeb80106,
1018 0xbeba0108, 0xbebc0100,
1019 0xbebe0102, 0xbec00104,
1020 0xbec20106, 0xbec40108,
1021 0xbec60100, 0xbec80102,
1022 0xbee60004, 0xbee70005,
1023 0xbeea0006, 0xbeeb0007,
1024 0xbee80008, 0xbee90009,
1025 0xbefc0000, 0xbf8a0000,
1026 0xbf810000, 0x00000000,
1029 static const u32 vgpr_init_regs[] =
1031 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1032 mmCOMPUTE_RESOURCE_LIMITS, 0,
1033 mmCOMPUTE_NUM_THREAD_X, 256*4,
1034 mmCOMPUTE_NUM_THREAD_Y, 1,
1035 mmCOMPUTE_NUM_THREAD_Z, 1,
1036 mmCOMPUTE_PGM_RSRC2, 20,
1037 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1038 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1039 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1040 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1041 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1042 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1043 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1044 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1045 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1046 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1049 static const u32 sgpr1_init_regs[] =
1051 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1052 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1053 mmCOMPUTE_NUM_THREAD_X, 256*5,
1054 mmCOMPUTE_NUM_THREAD_Y, 1,
1055 mmCOMPUTE_NUM_THREAD_Z, 1,
1056 mmCOMPUTE_PGM_RSRC2, 20,
1057 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1058 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1059 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1060 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1061 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1062 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1063 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1064 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1065 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1066 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1069 static const u32 sgpr2_init_regs[] =
1071 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1072 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1073 mmCOMPUTE_NUM_THREAD_X, 256*5,
1074 mmCOMPUTE_NUM_THREAD_Y, 1,
1075 mmCOMPUTE_NUM_THREAD_Z, 1,
1076 mmCOMPUTE_PGM_RSRC2, 20,
1077 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1078 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1079 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1080 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1081 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1082 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1083 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1084 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1085 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1086 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1089 static const u32 sec_ded_counter_registers[] =
1092 mmCPC_EDC_SCRATCH_CNT,
1093 mmCPC_EDC_UCODE_CNT,
1100 mmDC_EDC_CSINVOC_CNT,
1101 mmDC_EDC_RESTORE_CNT,
1107 mmSQC_ATC_EDC_GATCL1_CNT,
1113 mmTCP_ATC_EDC_GATCL1_CNT,
1118 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1120 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1121 struct amdgpu_ib ib;
1122 struct fence *f = NULL;
1125 unsigned total_size, vgpr_offset, sgpr_offset;
1128 /* only supported on CZ */
1129 if (adev->asic_type != CHIP_CARRIZO)
1132 /* bail if the compute ring is not ready */
1136 tmp = RREG32(mmGB_EDC_MODE);
1137 WREG32(mmGB_EDC_MODE, 0);
1140 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1142 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1144 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1145 total_size = ALIGN(total_size, 256);
1146 vgpr_offset = total_size;
1147 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1148 sgpr_offset = total_size;
1149 total_size += sizeof(sgpr_init_compute_shader);
1151 /* allocate an indirect buffer to put the commands in */
1152 memset(&ib, 0, sizeof(ib));
1153 r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1155 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1159 /* load the compute shaders */
1160 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1161 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1163 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1164 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1166 /* init the ib length to 0 */
1170 /* write the register state for the compute dispatch */
1171 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1172 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1173 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1174 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1176 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1177 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1178 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1179 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1180 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1181 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1183 /* write dispatch packet */
1184 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1185 ib.ptr[ib.length_dw++] = 8; /* x */
1186 ib.ptr[ib.length_dw++] = 1; /* y */
1187 ib.ptr[ib.length_dw++] = 1; /* z */
1188 ib.ptr[ib.length_dw++] =
1189 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1191 /* write CS partial flush packet */
1192 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1193 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1196 /* write the register state for the compute dispatch */
1197 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1198 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1199 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1200 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1202 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1203 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1204 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1205 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1206 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1207 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1209 /* write dispatch packet */
1210 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1211 ib.ptr[ib.length_dw++] = 8; /* x */
1212 ib.ptr[ib.length_dw++] = 1; /* y */
1213 ib.ptr[ib.length_dw++] = 1; /* z */
1214 ib.ptr[ib.length_dw++] =
1215 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1217 /* write CS partial flush packet */
1218 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1219 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1222 /* write the register state for the compute dispatch */
1223 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1224 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1225 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1226 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1228 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1229 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1230 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1231 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1232 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1233 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1235 /* write dispatch packet */
1236 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1237 ib.ptr[ib.length_dw++] = 8; /* x */
1238 ib.ptr[ib.length_dw++] = 1; /* y */
1239 ib.ptr[ib.length_dw++] = 1; /* z */
1240 ib.ptr[ib.length_dw++] =
1241 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1243 /* write CS partial flush packet */
1244 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1245 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1247 /* shedule the ib on the ring */
1248 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1249 AMDGPU_FENCE_OWNER_UNDEFINED,
1252 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1256 /* wait for the GPU to finish processing the IB */
1257 r = fence_wait(f, false);
1259 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1263 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1264 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1265 WREG32(mmGB_EDC_MODE, tmp);
1267 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1268 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1269 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1272 /* read back registers to clear the counters */
1273 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1274 RREG32(sec_ded_counter_registers[i]);
1278 amdgpu_ib_free(adev, &ib);
1283 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1286 u32 mc_shared_chmap, mc_arb_ramcfg;
1287 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1290 switch (adev->asic_type) {
1292 adev->gfx.config.max_shader_engines = 1;
1293 adev->gfx.config.max_tile_pipes = 2;
1294 adev->gfx.config.max_cu_per_sh = 6;
1295 adev->gfx.config.max_sh_per_se = 1;
1296 adev->gfx.config.max_backends_per_se = 2;
1297 adev->gfx.config.max_texture_channel_caches = 2;
1298 adev->gfx.config.max_gprs = 256;
1299 adev->gfx.config.max_gs_threads = 32;
1300 adev->gfx.config.max_hw_contexts = 8;
1302 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1303 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1304 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1305 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1306 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1309 adev->gfx.config.max_shader_engines = 4;
1310 adev->gfx.config.max_tile_pipes = 16;
1311 adev->gfx.config.max_cu_per_sh = 16;
1312 adev->gfx.config.max_sh_per_se = 1;
1313 adev->gfx.config.max_backends_per_se = 4;
1314 adev->gfx.config.max_texture_channel_caches = 16;
1315 adev->gfx.config.max_gprs = 256;
1316 adev->gfx.config.max_gs_threads = 32;
1317 adev->gfx.config.max_hw_contexts = 8;
1319 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1320 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1321 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1322 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1323 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1326 adev->gfx.config.max_shader_engines = 4;
1327 adev->gfx.config.max_tile_pipes = 8;
1328 adev->gfx.config.max_cu_per_sh = 8;
1329 adev->gfx.config.max_sh_per_se = 1;
1330 adev->gfx.config.max_backends_per_se = 2;
1331 adev->gfx.config.max_texture_channel_caches = 8;
1332 adev->gfx.config.max_gprs = 256;
1333 adev->gfx.config.max_gs_threads = 32;
1334 adev->gfx.config.max_hw_contexts = 8;
1336 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1337 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1338 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1339 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1340 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1343 adev->gfx.config.max_shader_engines = 1;
1344 adev->gfx.config.max_tile_pipes = 2;
1345 adev->gfx.config.max_sh_per_se = 1;
1346 adev->gfx.config.max_backends_per_se = 2;
1348 switch (adev->pdev->revision) {
1356 adev->gfx.config.max_cu_per_sh = 8;
1366 adev->gfx.config.max_cu_per_sh = 6;
1373 adev->gfx.config.max_cu_per_sh = 6;
1382 adev->gfx.config.max_cu_per_sh = 4;
1386 adev->gfx.config.max_texture_channel_caches = 2;
1387 adev->gfx.config.max_gprs = 256;
1388 adev->gfx.config.max_gs_threads = 32;
1389 adev->gfx.config.max_hw_contexts = 8;
1391 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1392 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1393 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1394 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1395 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1398 adev->gfx.config.max_shader_engines = 1;
1399 adev->gfx.config.max_tile_pipes = 2;
1400 adev->gfx.config.max_sh_per_se = 1;
1401 adev->gfx.config.max_backends_per_se = 1;
1403 switch (adev->pdev->revision) {
1410 adev->gfx.config.max_cu_per_sh = 3;
1416 adev->gfx.config.max_cu_per_sh = 2;
1420 adev->gfx.config.max_texture_channel_caches = 2;
1421 adev->gfx.config.max_gprs = 256;
1422 adev->gfx.config.max_gs_threads = 16;
1423 adev->gfx.config.max_hw_contexts = 8;
1425 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1426 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1427 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1428 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1429 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1432 adev->gfx.config.max_shader_engines = 2;
1433 adev->gfx.config.max_tile_pipes = 4;
1434 adev->gfx.config.max_cu_per_sh = 2;
1435 adev->gfx.config.max_sh_per_se = 1;
1436 adev->gfx.config.max_backends_per_se = 2;
1437 adev->gfx.config.max_texture_channel_caches = 4;
1438 adev->gfx.config.max_gprs = 256;
1439 adev->gfx.config.max_gs_threads = 32;
1440 adev->gfx.config.max_hw_contexts = 8;
1442 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1443 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1444 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1445 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1446 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1450 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1451 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1452 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1454 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1455 adev->gfx.config.mem_max_burst_length_bytes = 256;
1456 if (adev->flags & AMD_IS_APU) {
1457 /* Get memory bank mapping mode. */
1458 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1459 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1460 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1462 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1463 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1464 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1466 /* Validate settings in case only one DIMM installed. */
1467 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1468 dimm00_addr_map = 0;
1469 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1470 dimm01_addr_map = 0;
1471 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1472 dimm10_addr_map = 0;
1473 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1474 dimm11_addr_map = 0;
1476 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1477 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1478 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1479 adev->gfx.config.mem_row_size_in_kb = 2;
1481 adev->gfx.config.mem_row_size_in_kb = 1;
1483 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1484 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1485 if (adev->gfx.config.mem_row_size_in_kb > 4)
1486 adev->gfx.config.mem_row_size_in_kb = 4;
1489 adev->gfx.config.shader_engine_tile_size = 32;
1490 adev->gfx.config.num_gpus = 1;
1491 adev->gfx.config.multi_gpu_tile_size = 64;
1493 /* fix up row size */
1494 switch (adev->gfx.config.mem_row_size_in_kb) {
1497 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1500 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1503 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1506 adev->gfx.config.gb_addr_config = gb_addr_config;
1509 static int gfx_v8_0_sw_init(void *handle)
1512 struct amdgpu_ring *ring;
1513 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1516 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1520 /* Privileged reg */
1521 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1525 /* Privileged inst */
1526 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1530 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1532 gfx_v8_0_scratch_init(adev);
1534 r = gfx_v8_0_init_microcode(adev);
1536 DRM_ERROR("Failed to load gfx firmware!\n");
1540 r = gfx_v8_0_mec_init(adev);
1542 DRM_ERROR("Failed to init MEC BOs!\n");
1546 /* set up the gfx ring */
1547 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1548 ring = &adev->gfx.gfx_ring[i];
1549 ring->ring_obj = NULL;
1550 sprintf(ring->name, "gfx");
1551 /* no gfx doorbells on iceland */
1552 if (adev->asic_type != CHIP_TOPAZ) {
1553 ring->use_doorbell = true;
1554 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1557 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1558 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1559 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1560 AMDGPU_RING_TYPE_GFX);
1565 /* set up the compute queues */
1566 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1569 /* max 32 queues per MEC */
1570 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1571 DRM_ERROR("Too many (%d) compute rings!\n", i);
1574 ring = &adev->gfx.compute_ring[i];
1575 ring->ring_obj = NULL;
1576 ring->use_doorbell = true;
1577 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1578 ring->me = 1; /* first MEC */
1580 ring->queue = i % 8;
1581 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1582 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1583 /* type-2 packets are deprecated on MEC, use type-3 instead */
1584 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1585 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1586 &adev->gfx.eop_irq, irq_type,
1587 AMDGPU_RING_TYPE_COMPUTE);
1592 /* reserve GDS, GWS and OA resource for gfx */
1593 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1595 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1596 NULL, &adev->gds.gds_gfx_bo);
1600 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1602 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1603 NULL, &adev->gds.gws_gfx_bo);
1607 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1609 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1610 NULL, &adev->gds.oa_gfx_bo);
1614 adev->gfx.ce_ram_size = 0x8000;
1616 gfx_v8_0_gpu_early_init(adev);
1621 static int gfx_v8_0_sw_fini(void *handle)
1624 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1626 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1627 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1628 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1630 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1631 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1632 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1633 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1635 gfx_v8_0_mec_fini(adev);
1640 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1642 const u32 num_tile_mode_states = 32;
1643 const u32 num_secondary_tile_mode_states = 16;
1644 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1646 switch (adev->gfx.config.mem_row_size_in_kb) {
1648 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1652 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1655 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1659 switch (adev->asic_type) {
1661 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1662 switch (reg_offset) {
1664 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1665 PIPE_CONFIG(ADDR_SURF_P2) |
1666 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1667 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1670 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1671 PIPE_CONFIG(ADDR_SURF_P2) |
1672 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1673 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1676 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1677 PIPE_CONFIG(ADDR_SURF_P2) |
1678 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1679 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1682 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1683 PIPE_CONFIG(ADDR_SURF_P2) |
1684 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1685 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1688 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1689 PIPE_CONFIG(ADDR_SURF_P2) |
1690 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1691 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1694 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1695 PIPE_CONFIG(ADDR_SURF_P2) |
1696 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1697 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1700 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1701 PIPE_CONFIG(ADDR_SURF_P2) |
1702 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1706 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1707 PIPE_CONFIG(ADDR_SURF_P2));
1710 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1711 PIPE_CONFIG(ADDR_SURF_P2) |
1712 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1716 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1717 PIPE_CONFIG(ADDR_SURF_P2) |
1718 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1719 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1722 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1723 PIPE_CONFIG(ADDR_SURF_P2) |
1724 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1728 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1729 PIPE_CONFIG(ADDR_SURF_P2) |
1730 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1731 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1734 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1735 PIPE_CONFIG(ADDR_SURF_P2) |
1736 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1740 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1741 PIPE_CONFIG(ADDR_SURF_P2) |
1742 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1746 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1747 PIPE_CONFIG(ADDR_SURF_P2) |
1748 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1752 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1753 PIPE_CONFIG(ADDR_SURF_P2) |
1754 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1755 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1758 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1759 PIPE_CONFIG(ADDR_SURF_P2) |
1760 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1761 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1764 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1765 PIPE_CONFIG(ADDR_SURF_P2) |
1766 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1767 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1770 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1771 PIPE_CONFIG(ADDR_SURF_P2) |
1772 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1773 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1776 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1777 PIPE_CONFIG(ADDR_SURF_P2) |
1778 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1779 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1782 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1783 PIPE_CONFIG(ADDR_SURF_P2) |
1784 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1788 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1789 PIPE_CONFIG(ADDR_SURF_P2) |
1790 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1794 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1795 PIPE_CONFIG(ADDR_SURF_P2) |
1796 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1800 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1801 PIPE_CONFIG(ADDR_SURF_P2) |
1802 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1807 PIPE_CONFIG(ADDR_SURF_P2) |
1808 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1812 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1813 PIPE_CONFIG(ADDR_SURF_P2) |
1814 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1827 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1828 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1830 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1831 switch (reg_offset) {
1833 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1834 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1835 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1836 NUM_BANKS(ADDR_SURF_8_BANK));
1839 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1840 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1841 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1842 NUM_BANKS(ADDR_SURF_8_BANK));
1845 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1846 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1847 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1848 NUM_BANKS(ADDR_SURF_8_BANK));
1851 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1854 NUM_BANKS(ADDR_SURF_8_BANK));
1857 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1860 NUM_BANKS(ADDR_SURF_8_BANK));
1863 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1864 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1865 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1866 NUM_BANKS(ADDR_SURF_8_BANK));
1869 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1870 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1871 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1872 NUM_BANKS(ADDR_SURF_8_BANK));
1875 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1878 NUM_BANKS(ADDR_SURF_16_BANK));
1881 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1884 NUM_BANKS(ADDR_SURF_16_BANK));
1887 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1888 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1889 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1890 NUM_BANKS(ADDR_SURF_16_BANK));
1893 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1894 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1895 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1896 NUM_BANKS(ADDR_SURF_16_BANK));
1899 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1900 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1901 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1902 NUM_BANKS(ADDR_SURF_16_BANK));
1905 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1906 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1907 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1908 NUM_BANKS(ADDR_SURF_16_BANK));
1911 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1912 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1913 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1914 NUM_BANKS(ADDR_SURF_8_BANK));
1923 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1924 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1927 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1928 switch (reg_offset) {
1930 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1931 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1932 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1936 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1937 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1938 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1939 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1942 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1943 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1944 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1945 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1948 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1949 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1951 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1954 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1955 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1956 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1957 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1960 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1961 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1963 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1966 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1967 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1968 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1969 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1972 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1973 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1974 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1975 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1978 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1979 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1982 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1983 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1984 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1985 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1988 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1989 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1990 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1991 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1994 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1995 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1996 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1997 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2000 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2001 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2002 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2003 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2006 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2007 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2008 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2009 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2012 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2014 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2018 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2019 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2020 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2021 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2024 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2025 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2030 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2031 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2032 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2036 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2037 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2038 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2042 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2043 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2044 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2048 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2049 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2050 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2054 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2055 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2056 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2060 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2061 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2066 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2067 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2072 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2073 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2078 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2079 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2080 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2084 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2085 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2086 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2090 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2091 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2092 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2097 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2098 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2099 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2103 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2104 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2108 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2109 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2110 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2117 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
2118 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
2120 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2121 switch (reg_offset) {
2123 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2126 NUM_BANKS(ADDR_SURF_8_BANK));
2129 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2130 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2131 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2132 NUM_BANKS(ADDR_SURF_8_BANK));
2135 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2138 NUM_BANKS(ADDR_SURF_8_BANK));
2141 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2142 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2143 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2144 NUM_BANKS(ADDR_SURF_8_BANK));
2147 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2150 NUM_BANKS(ADDR_SURF_8_BANK));
2153 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2154 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2155 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2156 NUM_BANKS(ADDR_SURF_8_BANK));
2159 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2162 NUM_BANKS(ADDR_SURF_8_BANK));
2165 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2168 NUM_BANKS(ADDR_SURF_8_BANK));
2171 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2174 NUM_BANKS(ADDR_SURF_8_BANK));
2177 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2179 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2180 NUM_BANKS(ADDR_SURF_8_BANK));
2183 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2186 NUM_BANKS(ADDR_SURF_8_BANK));
2189 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2192 NUM_BANKS(ADDR_SURF_8_BANK));
2195 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2198 NUM_BANKS(ADDR_SURF_8_BANK));
2201 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2204 NUM_BANKS(ADDR_SURF_4_BANK));
2213 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
2214 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
2218 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2219 switch (reg_offset) {
2221 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2222 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2229 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2230 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2233 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2234 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2241 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2242 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2245 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2246 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2247 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2252 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2253 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2254 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2258 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2263 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2264 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2265 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2266 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2270 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2273 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2274 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2279 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2281 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2285 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2286 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2291 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2292 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2297 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2298 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2303 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2304 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2309 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2310 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2311 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2321 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2327 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2328 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2333 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2334 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2339 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2340 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2345 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2346 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2351 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2352 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2357 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2358 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2363 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2364 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2369 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2370 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2375 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2376 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2377 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2382 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2388 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2389 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2394 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2399 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2400 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2401 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2408 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
2409 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
2411 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2412 switch (reg_offset) {
2414 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2417 NUM_BANKS(ADDR_SURF_16_BANK));
2420 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2423 NUM_BANKS(ADDR_SURF_16_BANK));
2426 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2429 NUM_BANKS(ADDR_SURF_16_BANK));
2432 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2435 NUM_BANKS(ADDR_SURF_16_BANK));
2438 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441 NUM_BANKS(ADDR_SURF_16_BANK));
2444 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447 NUM_BANKS(ADDR_SURF_16_BANK));
2450 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2453 NUM_BANKS(ADDR_SURF_16_BANK));
2456 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2459 NUM_BANKS(ADDR_SURF_16_BANK));
2462 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2465 NUM_BANKS(ADDR_SURF_16_BANK));
2468 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471 NUM_BANKS(ADDR_SURF_16_BANK));
2474 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477 NUM_BANKS(ADDR_SURF_16_BANK));
2480 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 NUM_BANKS(ADDR_SURF_8_BANK));
2486 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489 NUM_BANKS(ADDR_SURF_4_BANK));
2492 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2495 NUM_BANKS(ADDR_SURF_4_BANK));
2504 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
2505 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
2509 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2510 switch (reg_offset) {
2512 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 PIPE_CONFIG(ADDR_SURF_P2) |
2514 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519 PIPE_CONFIG(ADDR_SURF_P2) |
2520 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 PIPE_CONFIG(ADDR_SURF_P2) |
2526 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2530 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531 PIPE_CONFIG(ADDR_SURF_P2) |
2532 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2533 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 PIPE_CONFIG(ADDR_SURF_P2) |
2538 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2542 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543 PIPE_CONFIG(ADDR_SURF_P2) |
2544 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2545 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2548 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P2) |
2550 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2551 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2554 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2555 PIPE_CONFIG(ADDR_SURF_P2));
2558 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2559 PIPE_CONFIG(ADDR_SURF_P2) |
2560 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2561 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 PIPE_CONFIG(ADDR_SURF_P2) |
2566 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571 PIPE_CONFIG(ADDR_SURF_P2) |
2572 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2576 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 PIPE_CONFIG(ADDR_SURF_P2) |
2578 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2582 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 PIPE_CONFIG(ADDR_SURF_P2) |
2584 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2588 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2589 PIPE_CONFIG(ADDR_SURF_P2) |
2590 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2594 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2595 PIPE_CONFIG(ADDR_SURF_P2) |
2596 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2600 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2601 PIPE_CONFIG(ADDR_SURF_P2) |
2602 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2603 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2607 PIPE_CONFIG(ADDR_SURF_P2) |
2608 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2612 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2613 PIPE_CONFIG(ADDR_SURF_P2) |
2614 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2615 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2618 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2619 PIPE_CONFIG(ADDR_SURF_P2) |
2620 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2621 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2624 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2625 PIPE_CONFIG(ADDR_SURF_P2) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2630 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2631 PIPE_CONFIG(ADDR_SURF_P2) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2633 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2636 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2637 PIPE_CONFIG(ADDR_SURF_P2) |
2638 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2639 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2642 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2643 PIPE_CONFIG(ADDR_SURF_P2) |
2644 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2645 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2648 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649 PIPE_CONFIG(ADDR_SURF_P2) |
2650 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2651 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655 PIPE_CONFIG(ADDR_SURF_P2) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2657 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2660 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2661 PIPE_CONFIG(ADDR_SURF_P2) |
2662 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2663 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2675 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
2676 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
2678 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2679 switch (reg_offset) {
2681 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2683 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2684 NUM_BANKS(ADDR_SURF_8_BANK));
2687 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2689 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2690 NUM_BANKS(ADDR_SURF_8_BANK));
2693 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2694 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2695 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2696 NUM_BANKS(ADDR_SURF_8_BANK));
2699 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2701 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2702 NUM_BANKS(ADDR_SURF_8_BANK));
2705 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2707 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2708 NUM_BANKS(ADDR_SURF_8_BANK));
2711 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2713 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2714 NUM_BANKS(ADDR_SURF_8_BANK));
2717 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2719 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2720 NUM_BANKS(ADDR_SURF_8_BANK));
2723 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2724 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2725 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726 NUM_BANKS(ADDR_SURF_16_BANK));
2729 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2730 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2731 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2732 NUM_BANKS(ADDR_SURF_16_BANK));
2735 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2736 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2738 NUM_BANKS(ADDR_SURF_16_BANK));
2741 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2744 NUM_BANKS(ADDR_SURF_16_BANK));
2747 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2749 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750 NUM_BANKS(ADDR_SURF_16_BANK));
2753 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2755 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2756 NUM_BANKS(ADDR_SURF_16_BANK));
2759 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2762 NUM_BANKS(ADDR_SURF_8_BANK));
2771 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
2772 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
2777 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2778 switch (reg_offset) {
2780 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781 PIPE_CONFIG(ADDR_SURF_P2) |
2782 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2786 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787 PIPE_CONFIG(ADDR_SURF_P2) |
2788 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2789 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2793 PIPE_CONFIG(ADDR_SURF_P2) |
2794 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2798 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799 PIPE_CONFIG(ADDR_SURF_P2) |
2800 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2804 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805 PIPE_CONFIG(ADDR_SURF_P2) |
2806 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2807 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2811 PIPE_CONFIG(ADDR_SURF_P2) |
2812 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817 PIPE_CONFIG(ADDR_SURF_P2) |
2818 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2819 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2822 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2823 PIPE_CONFIG(ADDR_SURF_P2));
2826 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827 PIPE_CONFIG(ADDR_SURF_P2) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2832 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2833 PIPE_CONFIG(ADDR_SURF_P2) |
2834 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2839 PIPE_CONFIG(ADDR_SURF_P2) |
2840 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2844 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2845 PIPE_CONFIG(ADDR_SURF_P2) |
2846 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2847 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2850 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2851 PIPE_CONFIG(ADDR_SURF_P2) |
2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2857 PIPE_CONFIG(ADDR_SURF_P2) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2862 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2863 PIPE_CONFIG(ADDR_SURF_P2) |
2864 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2868 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2869 PIPE_CONFIG(ADDR_SURF_P2) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2874 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2875 PIPE_CONFIG(ADDR_SURF_P2) |
2876 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2880 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2881 PIPE_CONFIG(ADDR_SURF_P2) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2886 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2887 PIPE_CONFIG(ADDR_SURF_P2) |
2888 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2892 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2893 PIPE_CONFIG(ADDR_SURF_P2) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2898 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2899 PIPE_CONFIG(ADDR_SURF_P2) |
2900 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2901 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2904 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2905 PIPE_CONFIG(ADDR_SURF_P2) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2910 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2911 PIPE_CONFIG(ADDR_SURF_P2) |
2912 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2913 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2916 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917 PIPE_CONFIG(ADDR_SURF_P2) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2922 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2923 PIPE_CONFIG(ADDR_SURF_P2) |
2924 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929 PIPE_CONFIG(ADDR_SURF_P2) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2943 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
2944 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
2946 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2947 switch (reg_offset) {
2949 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2952 NUM_BANKS(ADDR_SURF_8_BANK));
2955 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958 NUM_BANKS(ADDR_SURF_8_BANK));
2961 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2962 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2963 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2964 NUM_BANKS(ADDR_SURF_8_BANK));
2967 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2970 NUM_BANKS(ADDR_SURF_8_BANK));
2973 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2976 NUM_BANKS(ADDR_SURF_8_BANK));
2979 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2982 NUM_BANKS(ADDR_SURF_8_BANK));
2985 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2988 NUM_BANKS(ADDR_SURF_8_BANK));
2991 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2992 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2993 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 NUM_BANKS(ADDR_SURF_16_BANK));
2997 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000 NUM_BANKS(ADDR_SURF_16_BANK));
3003 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3004 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3005 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006 NUM_BANKS(ADDR_SURF_16_BANK));
3009 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012 NUM_BANKS(ADDR_SURF_16_BANK));
3015 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 NUM_BANKS(ADDR_SURF_16_BANK));
3021 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3023 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024 NUM_BANKS(ADDR_SURF_16_BANK));
3027 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030 NUM_BANKS(ADDR_SURF_8_BANK));
3039 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
3040 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
3045 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3049 for (i = 0; i < bit_width; i++) {
3056 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3058 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3060 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3061 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3062 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3063 } else if (se_num == 0xffffffff) {
3064 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3065 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3066 } else if (sh_num == 0xffffffff) {
3067 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3068 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3070 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3071 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3073 WREG32(mmGRBM_GFX_INDEX, data);
3076 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
3077 u32 max_rb_num_per_se,
3082 data = RREG32(mmCC_RB_BACKEND_DISABLE);
3083 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3085 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3087 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3089 mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
3094 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
3095 u32 se_num, u32 sh_per_se,
3096 u32 max_rb_num_per_se)
3100 u32 disabled_rbs = 0;
3101 u32 enabled_rbs = 0;
3103 mutex_lock(&adev->grbm_idx_mutex);
3104 for (i = 0; i < se_num; i++) {
3105 for (j = 0; j < sh_per_se; j++) {
3106 gfx_v8_0_select_se_sh(adev, i, j);
3107 data = gfx_v8_0_get_rb_disabled(adev,
3108 max_rb_num_per_se, sh_per_se);
3109 disabled_rbs |= data << ((i * sh_per_se + j) *
3110 RB_BITMAP_WIDTH_PER_SH);
3113 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3114 mutex_unlock(&adev->grbm_idx_mutex);
3117 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3118 if (!(disabled_rbs & mask))
3119 enabled_rbs |= mask;
3123 adev->gfx.config.backend_enable_mask = enabled_rbs;
3125 mutex_lock(&adev->grbm_idx_mutex);
3126 for (i = 0; i < se_num; i++) {
3127 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3129 for (j = 0; j < sh_per_se; j++) {
3130 switch (enabled_rbs & 3) {
3133 data |= (RASTER_CONFIG_RB_MAP_3 <<
3134 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
3136 data |= (RASTER_CONFIG_RB_MAP_0 <<
3137 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
3140 data |= (RASTER_CONFIG_RB_MAP_0 <<
3141 (i * sh_per_se + j) * 2);
3144 data |= (RASTER_CONFIG_RB_MAP_3 <<
3145 (i * sh_per_se + j) * 2);
3149 data |= (RASTER_CONFIG_RB_MAP_2 <<
3150 (i * sh_per_se + j) * 2);
3155 WREG32(mmPA_SC_RASTER_CONFIG, data);
3157 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3158 mutex_unlock(&adev->grbm_idx_mutex);
3162 * gfx_v8_0_init_compute_vmid - gart enable
3164 * @rdev: amdgpu_device pointer
3166 * Initialize compute vmid sh_mem registers
3169 #define DEFAULT_SH_MEM_BASES (0x6000)
3170 #define FIRST_COMPUTE_VMID (8)
3171 #define LAST_COMPUTE_VMID (16)
3172 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3175 uint32_t sh_mem_config;
3176 uint32_t sh_mem_bases;
3179 * Configure apertures:
3180 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3181 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3182 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3184 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3186 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3187 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3188 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3189 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3190 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3191 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3193 mutex_lock(&adev->srbm_mutex);
3194 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3195 vi_srbm_select(adev, 0, 0, 0, i);
3196 /* CP and shaders */
3197 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3198 WREG32(mmSH_MEM_APE1_BASE, 1);
3199 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3200 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3202 vi_srbm_select(adev, 0, 0, 0, 0);
3203 mutex_unlock(&adev->srbm_mutex);
3206 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3211 tmp = RREG32(mmGRBM_CNTL);
3212 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3213 WREG32(mmGRBM_CNTL, tmp);
3215 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3216 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3217 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3218 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
3219 adev->gfx.config.gb_addr_config & 0x70);
3220 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
3221 adev->gfx.config.gb_addr_config & 0x70);
3222 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3223 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3224 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3226 gfx_v8_0_tiling_mode_table_init(adev);
3228 gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
3229 adev->gfx.config.max_sh_per_se,
3230 adev->gfx.config.max_backends_per_se);
3232 /* XXX SH_MEM regs */
3233 /* where to put LDS, scratch, GPUVM in FSA64 space */
3234 mutex_lock(&adev->srbm_mutex);
3235 for (i = 0; i < 16; i++) {
3236 vi_srbm_select(adev, 0, 0, 0, i);
3237 /* CP and shaders */
3239 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3240 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3241 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3242 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3243 WREG32(mmSH_MEM_CONFIG, tmp);
3245 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3246 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3247 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3248 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3249 WREG32(mmSH_MEM_CONFIG, tmp);
3252 WREG32(mmSH_MEM_APE1_BASE, 1);
3253 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3254 WREG32(mmSH_MEM_BASES, 0);
3256 vi_srbm_select(adev, 0, 0, 0, 0);
3257 mutex_unlock(&adev->srbm_mutex);
3259 gfx_v8_0_init_compute_vmid(adev);
3261 mutex_lock(&adev->grbm_idx_mutex);
3263 * making sure that the following register writes will be broadcasted
3264 * to all the shaders
3266 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3268 WREG32(mmPA_SC_FIFO_SIZE,
3269 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3270 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3271 (adev->gfx.config.sc_prim_fifo_size_backend <<
3272 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3273 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3274 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3275 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3276 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3277 mutex_unlock(&adev->grbm_idx_mutex);
3281 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3286 mutex_lock(&adev->grbm_idx_mutex);
3287 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3288 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3289 gfx_v8_0_select_se_sh(adev, i, j);
3290 for (k = 0; k < adev->usec_timeout; k++) {
3291 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3297 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3298 mutex_unlock(&adev->grbm_idx_mutex);
3300 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3301 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3302 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3303 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3304 for (k = 0; k < adev->usec_timeout; k++) {
3305 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3311 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3314 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3317 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 1);
3318 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 1);
3319 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 1);
3320 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 1);
3322 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 0);
3323 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 0);
3324 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 0);
3325 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 0);
3327 WREG32(mmCP_INT_CNTL_RING0, tmp);
3330 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3332 u32 tmp = RREG32(mmRLC_CNTL);
3334 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3335 WREG32(mmRLC_CNTL, tmp);
3337 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3339 gfx_v8_0_wait_for_rlc_serdes(adev);
3342 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3344 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3346 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3347 WREG32(mmGRBM_SOFT_RESET, tmp);
3349 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3350 WREG32(mmGRBM_SOFT_RESET, tmp);
3354 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3356 u32 tmp = RREG32(mmRLC_CNTL);
3358 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3359 WREG32(mmRLC_CNTL, tmp);
3361 /* carrizo do enable cp interrupt after cp inited */
3362 if (!(adev->flags & AMD_IS_APU))
3363 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3368 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3370 const struct rlc_firmware_header_v2_0 *hdr;
3371 const __le32 *fw_data;
3372 unsigned i, fw_size;
3374 if (!adev->gfx.rlc_fw)
3377 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3378 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3380 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3381 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3382 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3384 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3385 for (i = 0; i < fw_size; i++)
3386 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3387 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3392 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3396 gfx_v8_0_rlc_stop(adev);
3399 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3402 WREG32(mmRLC_PG_CNTL, 0);
3404 gfx_v8_0_rlc_reset(adev);
3406 if (!adev->firmware.smu_load) {
3407 /* legacy rlc firmware loading */
3408 r = gfx_v8_0_rlc_load_microcode(adev);
3412 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3413 AMDGPU_UCODE_ID_RLC_G);
3418 gfx_v8_0_rlc_start(adev);
3423 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3426 u32 tmp = RREG32(mmCP_ME_CNTL);
3429 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3430 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3431 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3433 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3434 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3435 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3436 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3437 adev->gfx.gfx_ring[i].ready = false;
3439 WREG32(mmCP_ME_CNTL, tmp);
3443 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3445 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3446 const struct gfx_firmware_header_v1_0 *ce_hdr;
3447 const struct gfx_firmware_header_v1_0 *me_hdr;
3448 const __le32 *fw_data;
3449 unsigned i, fw_size;
3451 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3454 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3455 adev->gfx.pfp_fw->data;
3456 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3457 adev->gfx.ce_fw->data;
3458 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3459 adev->gfx.me_fw->data;
3461 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3462 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3463 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3465 gfx_v8_0_cp_gfx_enable(adev, false);
3468 fw_data = (const __le32 *)
3469 (adev->gfx.pfp_fw->data +
3470 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3471 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3472 WREG32(mmCP_PFP_UCODE_ADDR, 0);
3473 for (i = 0; i < fw_size; i++)
3474 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3475 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3478 fw_data = (const __le32 *)
3479 (adev->gfx.ce_fw->data +
3480 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3481 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3482 WREG32(mmCP_CE_UCODE_ADDR, 0);
3483 for (i = 0; i < fw_size; i++)
3484 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3485 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3488 fw_data = (const __le32 *)
3489 (adev->gfx.me_fw->data +
3490 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3491 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3492 WREG32(mmCP_ME_RAM_WADDR, 0);
3493 for (i = 0; i < fw_size; i++)
3494 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3495 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3500 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3503 const struct cs_section_def *sect = NULL;
3504 const struct cs_extent_def *ext = NULL;
3506 /* begin clear state */
3508 /* context control state */
3511 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3512 for (ext = sect->section; ext->extent != NULL; ++ext) {
3513 if (sect->id == SECT_CONTEXT)
3514 count += 2 + ext->reg_count;
3519 /* pa_sc_raster_config/pa_sc_raster_config1 */
3521 /* end clear state */
3529 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3531 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3532 const struct cs_section_def *sect = NULL;
3533 const struct cs_extent_def *ext = NULL;
3537 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3538 WREG32(mmCP_ENDIAN_SWAP, 0);
3539 WREG32(mmCP_DEVICE_ID, 1);
3541 gfx_v8_0_cp_gfx_enable(adev, true);
3543 r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
3545 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3549 /* clear state buffer */
3550 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3551 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3553 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3554 amdgpu_ring_write(ring, 0x80000000);
3555 amdgpu_ring_write(ring, 0x80000000);
3557 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3558 for (ext = sect->section; ext->extent != NULL; ++ext) {
3559 if (sect->id == SECT_CONTEXT) {
3560 amdgpu_ring_write(ring,
3561 PACKET3(PACKET3_SET_CONTEXT_REG,
3563 amdgpu_ring_write(ring,
3564 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3565 for (i = 0; i < ext->reg_count; i++)
3566 amdgpu_ring_write(ring, ext->extent[i]);
3571 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3572 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3573 switch (adev->asic_type) {
3575 amdgpu_ring_write(ring, 0x16000012);
3576 amdgpu_ring_write(ring, 0x0000002A);
3579 amdgpu_ring_write(ring, 0x3a00161a);
3580 amdgpu_ring_write(ring, 0x0000002e);
3584 amdgpu_ring_write(ring, 0x00000002);
3585 amdgpu_ring_write(ring, 0x00000000);
3588 amdgpu_ring_write(ring, 0x00000000);
3589 amdgpu_ring_write(ring, 0x00000000);
3595 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3596 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3598 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3599 amdgpu_ring_write(ring, 0);
3601 /* init the CE partitions */
3602 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3603 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3604 amdgpu_ring_write(ring, 0x8000);
3605 amdgpu_ring_write(ring, 0x8000);
3607 amdgpu_ring_unlock_commit(ring);
3612 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3614 struct amdgpu_ring *ring;
3617 u64 rb_addr, rptr_addr;
3620 /* Set the write pointer delay */
3621 WREG32(mmCP_RB_WPTR_DELAY, 0);
3623 /* set the RB to use vmid 0 */
3624 WREG32(mmCP_RB_VMID, 0);
3626 /* Set ring buffer size */
3627 ring = &adev->gfx.gfx_ring[0];
3628 rb_bufsz = order_base_2(ring->ring_size / 8);
3629 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3630 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3631 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3632 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3634 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3636 WREG32(mmCP_RB0_CNTL, tmp);
3638 /* Initialize the ring buffer's read and write pointers */
3639 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3641 WREG32(mmCP_RB0_WPTR, ring->wptr);
3643 /* set the wb address wether it's enabled or not */
3644 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3645 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3646 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3649 WREG32(mmCP_RB0_CNTL, tmp);
3651 rb_addr = ring->gpu_addr >> 8;
3652 WREG32(mmCP_RB0_BASE, rb_addr);
3653 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3655 /* no gfx doorbells on iceland */
3656 if (adev->asic_type != CHIP_TOPAZ) {
3657 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3658 if (ring->use_doorbell) {
3659 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3660 DOORBELL_OFFSET, ring->doorbell_index);
3661 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3664 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3667 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3669 if (adev->asic_type == CHIP_TONGA) {
3670 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3671 DOORBELL_RANGE_LOWER,
3672 AMDGPU_DOORBELL_GFX_RING0);
3673 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3675 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3676 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3681 /* start the ring */
3682 gfx_v8_0_cp_gfx_start(adev);
3684 r = amdgpu_ring_test_ring(ring);
3686 ring->ready = false;
3693 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3698 WREG32(mmCP_MEC_CNTL, 0);
3700 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3701 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3702 adev->gfx.compute_ring[i].ready = false;
3707 static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3709 gfx_v8_0_cp_compute_enable(adev, true);
3714 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3716 const struct gfx_firmware_header_v1_0 *mec_hdr;
3717 const __le32 *fw_data;
3718 unsigned i, fw_size;
3720 if (!adev->gfx.mec_fw)
3723 gfx_v8_0_cp_compute_enable(adev, false);
3725 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3726 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3728 fw_data = (const __le32 *)
3729 (adev->gfx.mec_fw->data +
3730 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3731 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3734 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3735 for (i = 0; i < fw_size; i++)
3736 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3737 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3739 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3740 if (adev->gfx.mec2_fw) {
3741 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3743 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3744 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3746 fw_data = (const __le32 *)
3747 (adev->gfx.mec2_fw->data +
3748 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3749 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3751 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3752 for (i = 0; i < fw_size; i++)
3753 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3754 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3761 uint32_t header; /* ordinal0 */
3762 uint32_t compute_dispatch_initiator; /* ordinal1 */
3763 uint32_t compute_dim_x; /* ordinal2 */
3764 uint32_t compute_dim_y; /* ordinal3 */
3765 uint32_t compute_dim_z; /* ordinal4 */
3766 uint32_t compute_start_x; /* ordinal5 */
3767 uint32_t compute_start_y; /* ordinal6 */
3768 uint32_t compute_start_z; /* ordinal7 */
3769 uint32_t compute_num_thread_x; /* ordinal8 */
3770 uint32_t compute_num_thread_y; /* ordinal9 */
3771 uint32_t compute_num_thread_z; /* ordinal10 */
3772 uint32_t compute_pipelinestat_enable; /* ordinal11 */
3773 uint32_t compute_perfcount_enable; /* ordinal12 */
3774 uint32_t compute_pgm_lo; /* ordinal13 */
3775 uint32_t compute_pgm_hi; /* ordinal14 */
3776 uint32_t compute_tba_lo; /* ordinal15 */
3777 uint32_t compute_tba_hi; /* ordinal16 */
3778 uint32_t compute_tma_lo; /* ordinal17 */
3779 uint32_t compute_tma_hi; /* ordinal18 */
3780 uint32_t compute_pgm_rsrc1; /* ordinal19 */
3781 uint32_t compute_pgm_rsrc2; /* ordinal20 */
3782 uint32_t compute_vmid; /* ordinal21 */
3783 uint32_t compute_resource_limits; /* ordinal22 */
3784 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
3785 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
3786 uint32_t compute_tmpring_size; /* ordinal25 */
3787 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
3788 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
3789 uint32_t compute_restart_x; /* ordinal28 */
3790 uint32_t compute_restart_y; /* ordinal29 */
3791 uint32_t compute_restart_z; /* ordinal30 */
3792 uint32_t compute_thread_trace_enable; /* ordinal31 */
3793 uint32_t compute_misc_reserved; /* ordinal32 */
3794 uint32_t compute_dispatch_id; /* ordinal33 */
3795 uint32_t compute_threadgroup_id; /* ordinal34 */
3796 uint32_t compute_relaunch; /* ordinal35 */
3797 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
3798 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
3799 uint32_t compute_wave_restore_control; /* ordinal38 */
3800 uint32_t reserved9; /* ordinal39 */
3801 uint32_t reserved10; /* ordinal40 */
3802 uint32_t reserved11; /* ordinal41 */
3803 uint32_t reserved12; /* ordinal42 */
3804 uint32_t reserved13; /* ordinal43 */
3805 uint32_t reserved14; /* ordinal44 */
3806 uint32_t reserved15; /* ordinal45 */
3807 uint32_t reserved16; /* ordinal46 */
3808 uint32_t reserved17; /* ordinal47 */
3809 uint32_t reserved18; /* ordinal48 */
3810 uint32_t reserved19; /* ordinal49 */
3811 uint32_t reserved20; /* ordinal50 */
3812 uint32_t reserved21; /* ordinal51 */
3813 uint32_t reserved22; /* ordinal52 */
3814 uint32_t reserved23; /* ordinal53 */
3815 uint32_t reserved24; /* ordinal54 */
3816 uint32_t reserved25; /* ordinal55 */
3817 uint32_t reserved26; /* ordinal56 */
3818 uint32_t reserved27; /* ordinal57 */
3819 uint32_t reserved28; /* ordinal58 */
3820 uint32_t reserved29; /* ordinal59 */
3821 uint32_t reserved30; /* ordinal60 */
3822 uint32_t reserved31; /* ordinal61 */
3823 uint32_t reserved32; /* ordinal62 */
3824 uint32_t reserved33; /* ordinal63 */
3825 uint32_t reserved34; /* ordinal64 */
3826 uint32_t compute_user_data_0; /* ordinal65 */
3827 uint32_t compute_user_data_1; /* ordinal66 */
3828 uint32_t compute_user_data_2; /* ordinal67 */
3829 uint32_t compute_user_data_3; /* ordinal68 */
3830 uint32_t compute_user_data_4; /* ordinal69 */
3831 uint32_t compute_user_data_5; /* ordinal70 */
3832 uint32_t compute_user_data_6; /* ordinal71 */
3833 uint32_t compute_user_data_7; /* ordinal72 */
3834 uint32_t compute_user_data_8; /* ordinal73 */
3835 uint32_t compute_user_data_9; /* ordinal74 */
3836 uint32_t compute_user_data_10; /* ordinal75 */
3837 uint32_t compute_user_data_11; /* ordinal76 */
3838 uint32_t compute_user_data_12; /* ordinal77 */
3839 uint32_t compute_user_data_13; /* ordinal78 */
3840 uint32_t compute_user_data_14; /* ordinal79 */
3841 uint32_t compute_user_data_15; /* ordinal80 */
3842 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
3843 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
3844 uint32_t reserved35; /* ordinal83 */
3845 uint32_t reserved36; /* ordinal84 */
3846 uint32_t reserved37; /* ordinal85 */
3847 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
3848 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
3849 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
3850 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
3851 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
3852 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
3853 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
3854 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
3855 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
3856 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
3857 uint32_t reserved38; /* ordinal96 */
3858 uint32_t reserved39; /* ordinal97 */
3859 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
3860 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
3861 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
3862 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
3863 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
3864 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
3865 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
3866 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
3867 uint32_t reserved40; /* ordinal106 */
3868 uint32_t reserved41; /* ordinal107 */
3869 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
3870 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
3871 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
3872 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
3873 uint32_t reserved42; /* ordinal112 */
3874 uint32_t reserved43; /* ordinal113 */
3875 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
3876 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
3877 uint32_t cp_packet_id_lo; /* ordinal116 */
3878 uint32_t cp_packet_id_hi; /* ordinal117 */
3879 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
3880 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
3881 uint32_t gds_save_base_addr_lo; /* ordinal120 */
3882 uint32_t gds_save_base_addr_hi; /* ordinal121 */
3883 uint32_t gds_save_mask_lo; /* ordinal122 */
3884 uint32_t gds_save_mask_hi; /* ordinal123 */
3885 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
3886 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
3887 uint32_t reserved44; /* ordinal126 */
3888 uint32_t reserved45; /* ordinal127 */
3889 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
3890 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
3891 uint32_t cp_hqd_active; /* ordinal130 */
3892 uint32_t cp_hqd_vmid; /* ordinal131 */
3893 uint32_t cp_hqd_persistent_state; /* ordinal132 */
3894 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
3895 uint32_t cp_hqd_queue_priority; /* ordinal134 */
3896 uint32_t cp_hqd_quantum; /* ordinal135 */
3897 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
3898 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
3899 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
3900 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
3901 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
3902 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
3903 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
3904 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
3905 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
3906 uint32_t cp_hqd_pq_control; /* ordinal145 */
3907 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
3908 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
3909 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
3910 uint32_t cp_hqd_ib_control; /* ordinal149 */
3911 uint32_t cp_hqd_iq_timer; /* ordinal150 */
3912 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
3913 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
3914 uint32_t cp_hqd_dma_offload; /* ordinal153 */
3915 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
3916 uint32_t cp_hqd_msg_type; /* ordinal155 */
3917 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
3918 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
3919 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
3920 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
3921 uint32_t cp_hqd_hq_status0; /* ordinal160 */
3922 uint32_t cp_hqd_hq_control0; /* ordinal161 */
3923 uint32_t cp_mqd_control; /* ordinal162 */
3924 uint32_t cp_hqd_hq_status1; /* ordinal163 */
3925 uint32_t cp_hqd_hq_control1; /* ordinal164 */
3926 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
3927 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
3928 uint32_t cp_hqd_eop_control; /* ordinal167 */
3929 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
3930 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
3931 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
3932 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
3933 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
3934 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
3935 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
3936 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
3937 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
3938 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
3939 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
3940 uint32_t cp_hqd_error; /* ordinal179 */
3941 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
3942 uint32_t cp_hqd_eop_dones; /* ordinal181 */
3943 uint32_t reserved46; /* ordinal182 */
3944 uint32_t reserved47; /* ordinal183 */
3945 uint32_t reserved48; /* ordinal184 */
3946 uint32_t reserved49; /* ordinal185 */
3947 uint32_t reserved50; /* ordinal186 */
3948 uint32_t reserved51; /* ordinal187 */
3949 uint32_t reserved52; /* ordinal188 */
3950 uint32_t reserved53; /* ordinal189 */
3951 uint32_t reserved54; /* ordinal190 */
3952 uint32_t reserved55; /* ordinal191 */
3953 uint32_t iqtimer_pkt_header; /* ordinal192 */
3954 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
3955 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
3956 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
3957 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
3958 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
3959 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
3960 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
3961 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
3962 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
3963 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
3964 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
3965 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
3966 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
3967 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
3968 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
3969 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
3970 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
3971 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
3972 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
3973 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
3974 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
3975 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
3976 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
3977 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
3978 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
3979 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
3980 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
3981 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
3982 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
3983 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
3984 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
3985 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
3986 uint32_t reserved56; /* ordinal225 */
3987 uint32_t reserved57; /* ordinal226 */
3988 uint32_t reserved58; /* ordinal227 */
3989 uint32_t set_resources_header; /* ordinal228 */
3990 uint32_t set_resources_dw1; /* ordinal229 */
3991 uint32_t set_resources_dw2; /* ordinal230 */
3992 uint32_t set_resources_dw3; /* ordinal231 */
3993 uint32_t set_resources_dw4; /* ordinal232 */
3994 uint32_t set_resources_dw5; /* ordinal233 */
3995 uint32_t set_resources_dw6; /* ordinal234 */
3996 uint32_t set_resources_dw7; /* ordinal235 */
3997 uint32_t reserved59; /* ordinal236 */
3998 uint32_t reserved60; /* ordinal237 */
3999 uint32_t reserved61; /* ordinal238 */
4000 uint32_t reserved62; /* ordinal239 */
4001 uint32_t reserved63; /* ordinal240 */
4002 uint32_t reserved64; /* ordinal241 */
4003 uint32_t reserved65; /* ordinal242 */
4004 uint32_t reserved66; /* ordinal243 */
4005 uint32_t reserved67; /* ordinal244 */
4006 uint32_t reserved68; /* ordinal245 */
4007 uint32_t reserved69; /* ordinal246 */
4008 uint32_t reserved70; /* ordinal247 */
4009 uint32_t reserved71; /* ordinal248 */
4010 uint32_t reserved72; /* ordinal249 */
4011 uint32_t reserved73; /* ordinal250 */
4012 uint32_t reserved74; /* ordinal251 */
4013 uint32_t reserved75; /* ordinal252 */
4014 uint32_t reserved76; /* ordinal253 */
4015 uint32_t reserved77; /* ordinal254 */
4016 uint32_t reserved78; /* ordinal255 */
4018 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4021 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4025 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4026 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4028 if (ring->mqd_obj) {
4029 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4030 if (unlikely(r != 0))
4031 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4033 amdgpu_bo_unpin(ring->mqd_obj);
4034 amdgpu_bo_unreserve(ring->mqd_obj);
4036 amdgpu_bo_unref(&ring->mqd_obj);
4037 ring->mqd_obj = NULL;
4042 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4046 bool use_doorbell = true;
4054 /* init the pipes */
4055 mutex_lock(&adev->srbm_mutex);
4056 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4057 int me = (i < 4) ? 1 : 2;
4058 int pipe = (i < 4) ? i : (i - 4);
4060 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4063 vi_srbm_select(adev, me, pipe, 0, 0);
4065 /* write the EOP addr */
4066 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4067 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4069 /* set the VMID assigned */
4070 WREG32(mmCP_HQD_VMID, 0);
4072 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4073 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4074 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4075 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4076 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4078 vi_srbm_select(adev, 0, 0, 0, 0);
4079 mutex_unlock(&adev->srbm_mutex);
4081 /* init the queues. Just two for now. */
4082 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4083 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4085 if (ring->mqd_obj == NULL) {
4086 r = amdgpu_bo_create(adev,
4087 sizeof(struct vi_mqd),
4089 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4090 NULL, &ring->mqd_obj);
4092 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4097 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4098 if (unlikely(r != 0)) {
4099 gfx_v8_0_cp_compute_fini(adev);
4102 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4105 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4106 gfx_v8_0_cp_compute_fini(adev);
4109 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4111 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4112 gfx_v8_0_cp_compute_fini(adev);
4116 /* init the mqd struct */
4117 memset(buf, 0, sizeof(struct vi_mqd));
4119 mqd = (struct vi_mqd *)buf;
4120 mqd->header = 0xC0310800;
4121 mqd->compute_pipelinestat_enable = 0x00000001;
4122 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4123 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4124 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4125 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4126 mqd->compute_misc_reserved = 0x00000003;
4128 mutex_lock(&adev->srbm_mutex);
4129 vi_srbm_select(adev, ring->me,
4133 /* disable wptr polling */
4134 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4135 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4136 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4138 mqd->cp_hqd_eop_base_addr_lo =
4139 RREG32(mmCP_HQD_EOP_BASE_ADDR);
4140 mqd->cp_hqd_eop_base_addr_hi =
4141 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4143 /* enable doorbell? */
4144 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4146 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4148 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4150 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4151 mqd->cp_hqd_pq_doorbell_control = tmp;
4153 /* disable the queue if it's active */
4154 mqd->cp_hqd_dequeue_request = 0;
4155 mqd->cp_hqd_pq_rptr = 0;
4156 mqd->cp_hqd_pq_wptr= 0;
4157 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4158 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4159 for (j = 0; j < adev->usec_timeout; j++) {
4160 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4164 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4165 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4166 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4169 /* set the pointer to the MQD */
4170 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4171 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4172 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4173 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4175 /* set MQD vmid to 0 */
4176 tmp = RREG32(mmCP_MQD_CONTROL);
4177 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4178 WREG32(mmCP_MQD_CONTROL, tmp);
4179 mqd->cp_mqd_control = tmp;
4181 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4182 hqd_gpu_addr = ring->gpu_addr >> 8;
4183 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4184 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4185 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4186 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4188 /* set up the HQD, this is similar to CP_RB0_CNTL */
4189 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4190 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4191 (order_base_2(ring->ring_size / 4) - 1));
4192 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4193 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4195 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4197 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4198 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4199 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4200 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4201 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4202 mqd->cp_hqd_pq_control = tmp;
4204 /* set the wb address wether it's enabled or not */
4205 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4206 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4207 mqd->cp_hqd_pq_rptr_report_addr_hi =
4208 upper_32_bits(wb_gpu_addr) & 0xffff;
4209 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4210 mqd->cp_hqd_pq_rptr_report_addr_lo);
4211 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4212 mqd->cp_hqd_pq_rptr_report_addr_hi);
4214 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4215 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4216 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4217 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4218 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4219 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4220 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4222 /* enable the doorbell if requested */
4224 if ((adev->asic_type == CHIP_CARRIZO) ||
4225 (adev->asic_type == CHIP_FIJI) ||
4226 (adev->asic_type == CHIP_STONEY)) {
4227 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4228 AMDGPU_DOORBELL_KIQ << 2);
4229 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4230 AMDGPU_DOORBELL_MEC_RING7 << 2);
4232 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4233 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4234 DOORBELL_OFFSET, ring->doorbell_index);
4235 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4236 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4237 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4238 mqd->cp_hqd_pq_doorbell_control = tmp;
4241 mqd->cp_hqd_pq_doorbell_control = 0;
4243 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4244 mqd->cp_hqd_pq_doorbell_control);
4246 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4248 mqd->cp_hqd_pq_wptr = ring->wptr;
4249 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4250 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4252 /* set the vmid for the queue */
4253 mqd->cp_hqd_vmid = 0;
4254 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4256 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4257 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4258 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4259 mqd->cp_hqd_persistent_state = tmp;
4261 /* activate the queue */
4262 mqd->cp_hqd_active = 1;
4263 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4265 vi_srbm_select(adev, 0, 0, 0, 0);
4266 mutex_unlock(&adev->srbm_mutex);
4268 amdgpu_bo_kunmap(ring->mqd_obj);
4269 amdgpu_bo_unreserve(ring->mqd_obj);
4273 tmp = RREG32(mmCP_PQ_STATUS);
4274 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4275 WREG32(mmCP_PQ_STATUS, tmp);
4278 r = gfx_v8_0_cp_compute_start(adev);
4282 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4283 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4286 r = amdgpu_ring_test_ring(ring);
4288 ring->ready = false;
4294 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4298 if (!(adev->flags & AMD_IS_APU))
4299 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4301 if (!adev->firmware.smu_load) {
4302 /* legacy firmware loading */
4303 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4307 r = gfx_v8_0_cp_compute_load_microcode(adev);
4311 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4312 AMDGPU_UCODE_ID_CP_CE);
4316 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4317 AMDGPU_UCODE_ID_CP_PFP);
4321 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4322 AMDGPU_UCODE_ID_CP_ME);
4326 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4327 AMDGPU_UCODE_ID_CP_MEC1);
4332 r = gfx_v8_0_cp_gfx_resume(adev);
4336 r = gfx_v8_0_cp_compute_resume(adev);
4340 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4345 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4347 gfx_v8_0_cp_gfx_enable(adev, enable);
4348 gfx_v8_0_cp_compute_enable(adev, enable);
4351 static int gfx_v8_0_hw_init(void *handle)
4354 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4356 gfx_v8_0_init_golden_registers(adev);
4358 gfx_v8_0_gpu_init(adev);
4360 r = gfx_v8_0_rlc_resume(adev);
4364 r = gfx_v8_0_cp_resume(adev);
4371 static int gfx_v8_0_hw_fini(void *handle)
4373 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4375 gfx_v8_0_cp_enable(adev, false);
4376 gfx_v8_0_rlc_stop(adev);
4377 gfx_v8_0_cp_compute_fini(adev);
4382 static int gfx_v8_0_suspend(void *handle)
4384 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4386 return gfx_v8_0_hw_fini(adev);
4389 static int gfx_v8_0_resume(void *handle)
4391 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4393 return gfx_v8_0_hw_init(adev);
4396 static bool gfx_v8_0_is_idle(void *handle)
4398 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4400 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4406 static int gfx_v8_0_wait_for_idle(void *handle)
4410 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4412 for (i = 0; i < adev->usec_timeout; i++) {
4413 /* read MC_STATUS */
4414 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4416 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4423 static void gfx_v8_0_print_status(void *handle)
4426 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4428 dev_info(adev->dev, "GFX 8.x registers\n");
4429 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
4430 RREG32(mmGRBM_STATUS));
4431 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
4432 RREG32(mmGRBM_STATUS2));
4433 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4434 RREG32(mmGRBM_STATUS_SE0));
4435 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4436 RREG32(mmGRBM_STATUS_SE1));
4437 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4438 RREG32(mmGRBM_STATUS_SE2));
4439 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4440 RREG32(mmGRBM_STATUS_SE3));
4441 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
4442 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4443 RREG32(mmCP_STALLED_STAT1));
4444 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4445 RREG32(mmCP_STALLED_STAT2));
4446 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4447 RREG32(mmCP_STALLED_STAT3));
4448 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4449 RREG32(mmCP_CPF_BUSY_STAT));
4450 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4451 RREG32(mmCP_CPF_STALLED_STAT1));
4452 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
4453 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
4454 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4455 RREG32(mmCP_CPC_STALLED_STAT1));
4456 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
4458 for (i = 0; i < 32; i++) {
4459 dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
4460 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
4462 for (i = 0; i < 16; i++) {
4463 dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
4464 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
4466 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4467 dev_info(adev->dev, " se: %d\n", i);
4468 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
4469 dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
4470 RREG32(mmPA_SC_RASTER_CONFIG));
4471 dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
4472 RREG32(mmPA_SC_RASTER_CONFIG_1));
4474 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4476 dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
4477 RREG32(mmGB_ADDR_CONFIG));
4478 dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n",
4479 RREG32(mmHDP_ADDR_CONFIG));
4480 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
4481 RREG32(mmDMIF_ADDR_CALC));
4482 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
4483 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
4484 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
4485 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
4486 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
4487 RREG32(mmUVD_UDEC_ADDR_CONFIG));
4488 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
4489 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
4490 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
4491 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4493 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
4494 RREG32(mmCP_MEQ_THRESHOLDS));
4495 dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
4496 RREG32(mmSX_DEBUG_1));
4497 dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
4498 RREG32(mmTA_CNTL_AUX));
4499 dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
4500 RREG32(mmSPI_CONFIG_CNTL));
4501 dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
4502 RREG32(mmSQ_CONFIG));
4503 dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
4504 RREG32(mmDB_DEBUG));
4505 dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
4506 RREG32(mmDB_DEBUG2));
4507 dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
4508 RREG32(mmDB_DEBUG3));
4509 dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
4510 RREG32(mmCB_HW_CONTROL));
4511 dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
4512 RREG32(mmSPI_CONFIG_CNTL_1));
4513 dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
4514 RREG32(mmPA_SC_FIFO_SIZE));
4515 dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
4516 RREG32(mmVGT_NUM_INSTANCES));
4517 dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
4518 RREG32(mmCP_PERFMON_CNTL));
4519 dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4520 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4521 dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
4522 RREG32(mmVGT_CACHE_INVALIDATION));
4523 dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
4524 RREG32(mmVGT_GS_VERTEX_REUSE));
4525 dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4526 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4527 dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
4528 RREG32(mmPA_CL_ENHANCE));
4529 dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
4530 RREG32(mmPA_SC_ENHANCE));
4532 dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
4533 RREG32(mmCP_ME_CNTL));
4534 dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
4535 RREG32(mmCP_MAX_CONTEXT));
4536 dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n",
4537 RREG32(mmCP_ENDIAN_SWAP));
4538 dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
4539 RREG32(mmCP_DEVICE_ID));
4541 dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
4542 RREG32(mmCP_SEM_WAIT_TIMER));
4544 dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
4545 RREG32(mmCP_RB_WPTR_DELAY));
4546 dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
4547 RREG32(mmCP_RB_VMID));
4548 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4549 RREG32(mmCP_RB0_CNTL));
4550 dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
4551 RREG32(mmCP_RB0_WPTR));
4552 dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
4553 RREG32(mmCP_RB0_RPTR_ADDR));
4554 dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4555 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4556 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4557 RREG32(mmCP_RB0_CNTL));
4558 dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
4559 RREG32(mmCP_RB0_BASE));
4560 dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
4561 RREG32(mmCP_RB0_BASE_HI));
4562 dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
4563 RREG32(mmCP_MEC_CNTL));
4564 dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n",
4565 RREG32(mmCP_CPF_DEBUG));
4567 dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
4568 RREG32(mmSCRATCH_ADDR));
4569 dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
4570 RREG32(mmSCRATCH_UMSK));
4572 dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
4573 RREG32(mmCP_INT_CNTL_RING0));
4574 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4575 RREG32(mmRLC_LB_CNTL));
4576 dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
4577 RREG32(mmRLC_CNTL));
4578 dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
4579 RREG32(mmRLC_CGCG_CGLS_CTRL));
4580 dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
4581 RREG32(mmRLC_LB_CNTR_INIT));
4582 dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
4583 RREG32(mmRLC_LB_CNTR_MAX));
4584 dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
4585 RREG32(mmRLC_LB_INIT_CU_MASK));
4586 dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
4587 RREG32(mmRLC_LB_PARAMS));
4588 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4589 RREG32(mmRLC_LB_CNTL));
4590 dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n",
4591 RREG32(mmRLC_MC_CNTL));
4592 dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
4593 RREG32(mmRLC_UCODE_CNTL));
4595 mutex_lock(&adev->srbm_mutex);
4596 for (i = 0; i < 16; i++) {
4597 vi_srbm_select(adev, 0, 0, 0, i);
4598 dev_info(adev->dev, " VM %d:\n", i);
4599 dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
4600 RREG32(mmSH_MEM_CONFIG));
4601 dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n",
4602 RREG32(mmSH_MEM_APE1_BASE));
4603 dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n",
4604 RREG32(mmSH_MEM_APE1_LIMIT));
4605 dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
4606 RREG32(mmSH_MEM_BASES));
4608 vi_srbm_select(adev, 0, 0, 0, 0);
4609 mutex_unlock(&adev->srbm_mutex);
4612 static int gfx_v8_0_soft_reset(void *handle)
4614 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4616 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4619 tmp = RREG32(mmGRBM_STATUS);
4620 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4621 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4622 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4623 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4624 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4625 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4626 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4627 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4628 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4629 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4632 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4633 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4634 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4635 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4636 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4640 tmp = RREG32(mmGRBM_STATUS2);
4641 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4642 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4643 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4646 tmp = RREG32(mmSRBM_STATUS);
4647 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4648 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4649 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4651 if (grbm_soft_reset || srbm_soft_reset) {
4652 gfx_v8_0_print_status((void *)adev);
4654 gfx_v8_0_rlc_stop(adev);
4656 /* Disable GFX parsing/prefetching */
4657 gfx_v8_0_cp_gfx_enable(adev, false);
4659 /* Disable MEC parsing/prefetching */
4662 if (grbm_soft_reset) {
4663 tmp = RREG32(mmGRBM_SOFT_RESET);
4664 tmp |= grbm_soft_reset;
4665 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4666 WREG32(mmGRBM_SOFT_RESET, tmp);
4667 tmp = RREG32(mmGRBM_SOFT_RESET);
4671 tmp &= ~grbm_soft_reset;
4672 WREG32(mmGRBM_SOFT_RESET, tmp);
4673 tmp = RREG32(mmGRBM_SOFT_RESET);
4676 if (srbm_soft_reset) {
4677 tmp = RREG32(mmSRBM_SOFT_RESET);
4678 tmp |= srbm_soft_reset;
4679 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4680 WREG32(mmSRBM_SOFT_RESET, tmp);
4681 tmp = RREG32(mmSRBM_SOFT_RESET);
4685 tmp &= ~srbm_soft_reset;
4686 WREG32(mmSRBM_SOFT_RESET, tmp);
4687 tmp = RREG32(mmSRBM_SOFT_RESET);
4689 /* Wait a little for things to settle down */
4691 gfx_v8_0_print_status((void *)adev);
4697 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4699 * @adev: amdgpu_device pointer
4701 * Fetches a GPU clock counter snapshot.
4702 * Returns the 64 bit clock counter snapshot.
4704 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4708 mutex_lock(&adev->gfx.gpu_clock_mutex);
4709 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4710 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4711 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4712 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4716 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4718 uint32_t gds_base, uint32_t gds_size,
4719 uint32_t gws_base, uint32_t gws_size,
4720 uint32_t oa_base, uint32_t oa_size)
4722 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4723 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4725 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4726 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4728 oa_base = oa_base >> AMDGPU_OA_SHIFT;
4729 oa_size = oa_size >> AMDGPU_OA_SHIFT;
4732 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4733 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4734 WRITE_DATA_DST_SEL(0)));
4735 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4736 amdgpu_ring_write(ring, 0);
4737 amdgpu_ring_write(ring, gds_base);
4740 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4741 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4742 WRITE_DATA_DST_SEL(0)));
4743 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4744 amdgpu_ring_write(ring, 0);
4745 amdgpu_ring_write(ring, gds_size);
4748 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4749 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4750 WRITE_DATA_DST_SEL(0)));
4751 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4752 amdgpu_ring_write(ring, 0);
4753 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4756 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4757 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4758 WRITE_DATA_DST_SEL(0)));
4759 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4760 amdgpu_ring_write(ring, 0);
4761 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4764 static int gfx_v8_0_early_init(void *handle)
4766 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4768 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4769 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4770 gfx_v8_0_set_ring_funcs(adev);
4771 gfx_v8_0_set_irq_funcs(adev);
4772 gfx_v8_0_set_gds_init(adev);
4777 static int gfx_v8_0_late_init(void *handle)
4779 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4782 /* requires IBs so do in late init after IB pool is initialized */
4783 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4790 static int gfx_v8_0_set_powergating_state(void *handle,
4791 enum amd_powergating_state state)
4796 static int gfx_v8_0_set_clockgating_state(void *handle,
4797 enum amd_clockgating_state state)
4802 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4806 rptr = ring->adev->wb.wb[ring->rptr_offs];
4811 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4813 struct amdgpu_device *adev = ring->adev;
4816 if (ring->use_doorbell)
4817 /* XXX check if swapping is necessary on BE */
4818 wptr = ring->adev->wb.wb[ring->wptr_offs];
4820 wptr = RREG32(mmCP_RB0_WPTR);
4825 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4827 struct amdgpu_device *adev = ring->adev;
4829 if (ring->use_doorbell) {
4830 /* XXX check if swapping is necessary on BE */
4831 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4832 WDOORBELL32(ring->doorbell_index, ring->wptr);
4834 WREG32(mmCP_RB0_WPTR, ring->wptr);
4835 (void)RREG32(mmCP_RB0_WPTR);
4839 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4841 u32 ref_and_mask, reg_mem_engine;
4843 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4846 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4849 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4856 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4857 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4860 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4861 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4862 WAIT_REG_MEM_FUNCTION(3) | /* == */
4864 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4865 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4866 amdgpu_ring_write(ring, ref_and_mask);
4867 amdgpu_ring_write(ring, ref_and_mask);
4868 amdgpu_ring_write(ring, 0x20); /* poll interval */
4871 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4872 struct amdgpu_ib *ib)
4874 bool need_ctx_switch = ring->current_ctx != ib->ctx;
4875 u32 header, control = 0;
4876 u32 next_rptr = ring->wptr + 5;
4878 /* drop the CE preamble IB for the same context */
4879 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4882 if (need_ctx_switch)
4886 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4887 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4888 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4889 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4890 amdgpu_ring_write(ring, next_rptr);
4892 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4893 if (need_ctx_switch) {
4894 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4895 amdgpu_ring_write(ring, 0);
4898 if (ib->flags & AMDGPU_IB_FLAG_CE)
4899 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4901 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4903 control |= ib->length_dw |
4904 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4906 amdgpu_ring_write(ring, header);
4907 amdgpu_ring_write(ring,
4911 (ib->gpu_addr & 0xFFFFFFFC));
4912 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4913 amdgpu_ring_write(ring, control);
4916 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4917 struct amdgpu_ib *ib)
4919 u32 header, control = 0;
4920 u32 next_rptr = ring->wptr + 5;
4922 control |= INDIRECT_BUFFER_VALID;
4925 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4926 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4927 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4928 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4929 amdgpu_ring_write(ring, next_rptr);
4931 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4933 control |= ib->length_dw |
4934 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4936 amdgpu_ring_write(ring, header);
4937 amdgpu_ring_write(ring,
4941 (ib->gpu_addr & 0xFFFFFFFC));
4942 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4943 amdgpu_ring_write(ring, control);
4946 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4947 u64 seq, unsigned flags)
4949 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4950 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4952 /* EVENT_WRITE_EOP - flush caches, send int */
4953 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4954 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4956 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4958 amdgpu_ring_write(ring, addr & 0xfffffffc);
4959 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4960 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4961 amdgpu_ring_write(ring, lower_32_bits(seq));
4962 amdgpu_ring_write(ring, upper_32_bits(seq));
4967 * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4969 * @ring: amdgpu ring buffer object
4970 * @semaphore: amdgpu semaphore object
4971 * @emit_wait: Is this a sempahore wait?
4973 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4974 * from running ahead of semaphore waits.
4976 static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4977 struct amdgpu_semaphore *semaphore,
4980 uint64_t addr = semaphore->gpu_addr;
4981 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4983 if (ring->adev->asic_type == CHIP_TOPAZ ||
4984 ring->adev->asic_type == CHIP_TONGA ||
4985 ring->adev->asic_type == CHIP_FIJI)
4986 /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4989 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4990 amdgpu_ring_write(ring, lower_32_bits(addr));
4991 amdgpu_ring_write(ring, upper_32_bits(addr));
4992 amdgpu_ring_write(ring, sel);
4995 if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4996 /* Prevent the PFP from running ahead of the semaphore wait */
4997 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4998 amdgpu_ring_write(ring, 0x0);
5004 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5005 unsigned vm_id, uint64_t pd_addr)
5007 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5008 uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
5009 uint64_t addr = ring->fence_drv.gpu_addr;
5011 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5012 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5013 WAIT_REG_MEM_FUNCTION(3))); /* equal */
5014 amdgpu_ring_write(ring, addr & 0xfffffffc);
5015 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5016 amdgpu_ring_write(ring, seq);
5017 amdgpu_ring_write(ring, 0xffffffff);
5018 amdgpu_ring_write(ring, 4); /* poll interval */
5021 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5022 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5023 amdgpu_ring_write(ring, 0);
5024 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5025 amdgpu_ring_write(ring, 0);
5028 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5029 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5030 WRITE_DATA_DST_SEL(0)) |
5033 amdgpu_ring_write(ring,
5034 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5036 amdgpu_ring_write(ring,
5037 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5039 amdgpu_ring_write(ring, 0);
5040 amdgpu_ring_write(ring, pd_addr >> 12);
5042 /* bits 0-15 are the VM contexts0-15 */
5043 /* invalidate the cache */
5044 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5045 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5046 WRITE_DATA_DST_SEL(0)));
5047 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5048 amdgpu_ring_write(ring, 0);
5049 amdgpu_ring_write(ring, 1 << vm_id);
5051 /* wait for the invalidate to complete */
5052 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5053 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5054 WAIT_REG_MEM_FUNCTION(0) | /* always */
5055 WAIT_REG_MEM_ENGINE(0))); /* me */
5056 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5057 amdgpu_ring_write(ring, 0);
5058 amdgpu_ring_write(ring, 0); /* ref */
5059 amdgpu_ring_write(ring, 0); /* mask */
5060 amdgpu_ring_write(ring, 0x20); /* poll interval */
5062 /* compute doesn't have PFP */
5064 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5065 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5066 amdgpu_ring_write(ring, 0x0);
5067 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5068 amdgpu_ring_write(ring, 0);
5069 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5070 amdgpu_ring_write(ring, 0);
5074 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5076 return ring->adev->wb.wb[ring->rptr_offs];
5079 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5081 return ring->adev->wb.wb[ring->wptr_offs];
5084 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5086 struct amdgpu_device *adev = ring->adev;
5088 /* XXX check if swapping is necessary on BE */
5089 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5090 WDOORBELL32(ring->doorbell_index, ring->wptr);
5093 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5097 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5098 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5100 /* RELEASE_MEM - flush caches, send int */
5101 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5102 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5104 EOP_TC_WB_ACTION_EN |
5105 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5107 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5108 amdgpu_ring_write(ring, addr & 0xfffffffc);
5109 amdgpu_ring_write(ring, upper_32_bits(addr));
5110 amdgpu_ring_write(ring, lower_32_bits(seq));
5111 amdgpu_ring_write(ring, upper_32_bits(seq));
5114 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5115 enum amdgpu_interrupt_state state)
5120 case AMDGPU_IRQ_STATE_DISABLE:
5121 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5122 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5123 TIME_STAMP_INT_ENABLE, 0);
5124 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5126 case AMDGPU_IRQ_STATE_ENABLE:
5127 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5129 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5130 TIME_STAMP_INT_ENABLE, 1);
5131 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5138 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5140 enum amdgpu_interrupt_state state)
5142 u32 mec_int_cntl, mec_int_cntl_reg;
5145 * amdgpu controls only pipe 0 of MEC1. That's why this function only
5146 * handles the setting of interrupts for this specific pipe. All other
5147 * pipes' interrupts are set by amdkfd.
5153 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5156 DRM_DEBUG("invalid pipe %d\n", pipe);
5160 DRM_DEBUG("invalid me %d\n", me);
5165 case AMDGPU_IRQ_STATE_DISABLE:
5166 mec_int_cntl = RREG32(mec_int_cntl_reg);
5167 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5168 TIME_STAMP_INT_ENABLE, 0);
5169 WREG32(mec_int_cntl_reg, mec_int_cntl);
5171 case AMDGPU_IRQ_STATE_ENABLE:
5172 mec_int_cntl = RREG32(mec_int_cntl_reg);
5173 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5174 TIME_STAMP_INT_ENABLE, 1);
5175 WREG32(mec_int_cntl_reg, mec_int_cntl);
5182 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5183 struct amdgpu_irq_src *source,
5185 enum amdgpu_interrupt_state state)
5190 case AMDGPU_IRQ_STATE_DISABLE:
5191 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5192 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5193 PRIV_REG_INT_ENABLE, 0);
5194 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5196 case AMDGPU_IRQ_STATE_ENABLE:
5197 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5198 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5199 PRIV_REG_INT_ENABLE, 0);
5200 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5209 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5210 struct amdgpu_irq_src *source,
5212 enum amdgpu_interrupt_state state)
5217 case AMDGPU_IRQ_STATE_DISABLE:
5218 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5219 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5220 PRIV_INSTR_INT_ENABLE, 0);
5221 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5223 case AMDGPU_IRQ_STATE_ENABLE:
5224 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5225 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5226 PRIV_INSTR_INT_ENABLE, 1);
5227 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5236 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5237 struct amdgpu_irq_src *src,
5239 enum amdgpu_interrupt_state state)
5242 case AMDGPU_CP_IRQ_GFX_EOP:
5243 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5245 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5246 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5248 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5249 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5251 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5252 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5254 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5255 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5257 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5258 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5260 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5261 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5263 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5264 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5266 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5267 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5275 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5276 struct amdgpu_irq_src *source,
5277 struct amdgpu_iv_entry *entry)
5280 u8 me_id, pipe_id, queue_id;
5281 struct amdgpu_ring *ring;
5283 DRM_DEBUG("IH: CP EOP\n");
5284 me_id = (entry->ring_id & 0x0c) >> 2;
5285 pipe_id = (entry->ring_id & 0x03) >> 0;
5286 queue_id = (entry->ring_id & 0x70) >> 4;
5290 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5294 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5295 ring = &adev->gfx.compute_ring[i];
5296 /* Per-queue interrupt is supported for MEC starting from VI.
5297 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5299 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5300 amdgpu_fence_process(ring);
5307 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5308 struct amdgpu_irq_src *source,
5309 struct amdgpu_iv_entry *entry)
5311 DRM_ERROR("Illegal register access in command stream\n");
5312 schedule_work(&adev->reset_work);
5316 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5317 struct amdgpu_irq_src *source,
5318 struct amdgpu_iv_entry *entry)
5320 DRM_ERROR("Illegal instruction in command stream\n");
5321 schedule_work(&adev->reset_work);
5325 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5326 .early_init = gfx_v8_0_early_init,
5327 .late_init = gfx_v8_0_late_init,
5328 .sw_init = gfx_v8_0_sw_init,
5329 .sw_fini = gfx_v8_0_sw_fini,
5330 .hw_init = gfx_v8_0_hw_init,
5331 .hw_fini = gfx_v8_0_hw_fini,
5332 .suspend = gfx_v8_0_suspend,
5333 .resume = gfx_v8_0_resume,
5334 .is_idle = gfx_v8_0_is_idle,
5335 .wait_for_idle = gfx_v8_0_wait_for_idle,
5336 .soft_reset = gfx_v8_0_soft_reset,
5337 .print_status = gfx_v8_0_print_status,
5338 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5339 .set_powergating_state = gfx_v8_0_set_powergating_state,
5342 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5343 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5344 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5345 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5347 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5348 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5349 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5350 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5351 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5352 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5353 .test_ring = gfx_v8_0_ring_test_ring,
5354 .test_ib = gfx_v8_0_ring_test_ib,
5355 .insert_nop = amdgpu_ring_insert_nop,
5358 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5359 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5360 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5361 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5363 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5364 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5365 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5366 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5367 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5368 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5369 .test_ring = gfx_v8_0_ring_test_ring,
5370 .test_ib = gfx_v8_0_ring_test_ib,
5371 .insert_nop = amdgpu_ring_insert_nop,
5374 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5378 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5379 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5381 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5382 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5385 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5386 .set = gfx_v8_0_set_eop_interrupt_state,
5387 .process = gfx_v8_0_eop_irq,
5390 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5391 .set = gfx_v8_0_set_priv_reg_fault_state,
5392 .process = gfx_v8_0_priv_reg_irq,
5395 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5396 .set = gfx_v8_0_set_priv_inst_fault_state,
5397 .process = gfx_v8_0_priv_inst_irq,
5400 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5402 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5403 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5405 adev->gfx.priv_reg_irq.num_types = 1;
5406 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5408 adev->gfx.priv_inst_irq.num_types = 1;
5409 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5412 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5414 /* init asci gds info */
5415 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5416 adev->gds.gws.total_size = 64;
5417 adev->gds.oa.total_size = 16;
5419 if (adev->gds.mem.total_size == 64 * 1024) {
5420 adev->gds.mem.gfx_partition_size = 4096;
5421 adev->gds.mem.cs_partition_size = 4096;
5423 adev->gds.gws.gfx_partition_size = 4;
5424 adev->gds.gws.cs_partition_size = 4;
5426 adev->gds.oa.gfx_partition_size = 4;
5427 adev->gds.oa.cs_partition_size = 1;
5429 adev->gds.mem.gfx_partition_size = 1024;
5430 adev->gds.mem.cs_partition_size = 1024;
5432 adev->gds.gws.gfx_partition_size = 16;
5433 adev->gds.gws.cs_partition_size = 16;
5435 adev->gds.oa.gfx_partition_size = 4;
5436 adev->gds.oa.cs_partition_size = 4;
5440 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
5443 u32 mask = 0, tmp, tmp1;
5446 gfx_v8_0_select_se_sh(adev, se, sh);
5447 tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5448 tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5449 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5456 for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
5461 return (~tmp) & mask;
5464 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5465 struct amdgpu_cu_info *cu_info)
5467 int i, j, k, counter, active_cu_number = 0;
5468 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5470 if (!adev || !cu_info)
5473 mutex_lock(&adev->grbm_idx_mutex);
5474 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5475 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5479 bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
5480 cu_info->bitmap[i][j] = bitmap;
5482 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5483 if (bitmap & mask) {
5490 active_cu_number += counter;
5491 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5495 cu_info->number = active_cu_number;
5496 cu_info->ao_cu_mask = ao_cu_mask;
5497 mutex_unlock(&adev->grbm_idx_mutex);