2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #define GFX8_NUM_GFX_RINGS 1
53 #define GFX8_NUM_COMPUTE_RINGS 8
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
59 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
69 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
70 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
72 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
73 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
77 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
79 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
80 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
82 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
83 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
85 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
86 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
89 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
90 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
92 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
93 MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
96 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
97 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
99 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
100 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
103 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
105 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
106 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
107 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
108 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
109 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
110 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
111 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
112 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
113 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
114 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
115 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
116 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
117 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
118 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
119 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
120 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
123 static const u32 golden_settings_tonga_a11[] =
125 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
126 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
127 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
128 mmGB_GPU_ID, 0x0000000f, 0x00000000,
129 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
130 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
131 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
132 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
133 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
134 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
135 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
136 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
137 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
138 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
139 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
142 static const u32 tonga_golden_common_all[] =
144 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
145 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
146 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
147 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
148 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
149 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
150 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
151 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
154 static const u32 tonga_mgcg_cgcg_init[] =
156 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
157 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
158 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
159 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
160 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
161 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
162 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
163 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
164 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
165 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
166 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
167 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
168 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
169 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
170 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
171 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
172 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
173 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
174 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
175 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
176 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
177 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
178 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
179 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
180 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
181 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
182 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
183 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
184 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
185 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
186 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
187 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
188 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
189 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
190 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
191 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
192 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
193 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
194 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
195 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
196 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
197 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
198 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
199 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
200 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
201 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
202 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
203 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
204 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
205 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
206 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
207 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
228 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
229 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
230 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
233 static const u32 fiji_golden_common_all[] =
235 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
236 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
237 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
238 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
239 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
240 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
241 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
242 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
243 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
244 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
247 static const u32 golden_settings_fiji_a10[] =
249 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
250 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
251 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
252 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
253 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
254 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
255 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
256 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
257 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
258 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
259 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
262 static const u32 fiji_mgcg_cgcg_init[] =
264 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
265 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
266 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
267 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
268 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
269 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
270 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
271 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
272 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
273 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
274 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
275 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
276 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
277 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
278 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
279 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
280 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
281 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
282 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
283 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
284 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
285 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
286 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
287 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
288 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
289 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
290 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
291 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
292 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
293 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
294 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
296 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
297 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
298 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
301 static const u32 golden_settings_iceland_a11[] =
303 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
304 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
305 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
306 mmGB_GPU_ID, 0x0000000f, 0x00000000,
307 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
308 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
309 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
310 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
311 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
312 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
313 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
314 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
315 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
316 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
317 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
320 static const u32 iceland_golden_common_all[] =
322 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
323 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
324 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
325 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
326 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
327 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
328 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
329 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
332 static const u32 iceland_mgcg_cgcg_init[] =
334 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
335 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
336 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
337 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
338 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
339 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
340 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
341 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
342 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
343 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
344 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
345 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
346 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
347 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
348 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
349 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
350 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
351 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
352 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
353 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
354 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
355 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
356 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
357 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
358 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
359 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
360 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
361 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
363 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
364 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
365 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
366 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
367 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
368 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
369 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
370 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
371 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
372 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
373 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
374 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
375 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
376 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
377 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
378 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
379 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
380 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
381 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
382 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
383 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
384 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
385 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
386 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
387 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
388 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
389 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
390 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
391 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
392 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
393 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
394 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
395 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
396 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
397 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400 static const u32 cz_golden_settings_a11[] =
402 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
403 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
404 mmGB_GPU_ID, 0x0000000f, 0x00000000,
405 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
406 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
407 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
409 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
410 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
411 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
414 static const u32 cz_golden_common_all[] =
416 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
417 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
418 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
419 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
420 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
421 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
422 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
423 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
426 static const u32 cz_mgcg_cgcg_init[] =
428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
460 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
461 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
462 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
463 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
464 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
465 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
466 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
467 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
468 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
469 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
470 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
471 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
472 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
473 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
474 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
475 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
476 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
477 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
478 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
479 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
480 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
481 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
482 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
483 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
484 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
485 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
486 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
487 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
488 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
489 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
490 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
491 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
492 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
493 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
494 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
495 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
496 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
497 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
498 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
499 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
500 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
501 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
502 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
505 static const u32 stoney_golden_settings_a11[] =
507 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
508 mmGB_GPU_ID, 0x0000000f, 0x00000000,
509 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
510 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
511 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
512 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
513 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
516 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
519 static const u32 stoney_golden_common_all[] =
521 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
523 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
525 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
531 static const u32 stoney_mgcg_cgcg_init[] =
533 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
534 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
535 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
536 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
537 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
538 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
541 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
542 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
543 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
545 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
547 switch (adev->asic_type) {
549 amdgpu_program_register_sequence(adev,
550 iceland_mgcg_cgcg_init,
551 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
552 amdgpu_program_register_sequence(adev,
553 golden_settings_iceland_a11,
554 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
555 amdgpu_program_register_sequence(adev,
556 iceland_golden_common_all,
557 (const u32)ARRAY_SIZE(iceland_golden_common_all));
560 amdgpu_program_register_sequence(adev,
562 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
563 amdgpu_program_register_sequence(adev,
564 golden_settings_fiji_a10,
565 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
566 amdgpu_program_register_sequence(adev,
567 fiji_golden_common_all,
568 (const u32)ARRAY_SIZE(fiji_golden_common_all));
572 amdgpu_program_register_sequence(adev,
573 tonga_mgcg_cgcg_init,
574 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
575 amdgpu_program_register_sequence(adev,
576 golden_settings_tonga_a11,
577 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
578 amdgpu_program_register_sequence(adev,
579 tonga_golden_common_all,
580 (const u32)ARRAY_SIZE(tonga_golden_common_all));
583 amdgpu_program_register_sequence(adev,
585 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
586 amdgpu_program_register_sequence(adev,
587 cz_golden_settings_a11,
588 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
589 amdgpu_program_register_sequence(adev,
590 cz_golden_common_all,
591 (const u32)ARRAY_SIZE(cz_golden_common_all));
594 amdgpu_program_register_sequence(adev,
595 stoney_mgcg_cgcg_init,
596 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
597 amdgpu_program_register_sequence(adev,
598 stoney_golden_settings_a11,
599 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
600 amdgpu_program_register_sequence(adev,
601 stoney_golden_common_all,
602 (const u32)ARRAY_SIZE(stoney_golden_common_all));
609 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
613 adev->gfx.scratch.num_reg = 7;
614 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
615 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
616 adev->gfx.scratch.free[i] = true;
617 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
621 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
623 struct amdgpu_device *adev = ring->adev;
629 r = amdgpu_gfx_scratch_get(adev, &scratch);
631 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
634 WREG32(scratch, 0xCAFEDEAD);
635 r = amdgpu_ring_lock(ring, 3);
637 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
639 amdgpu_gfx_scratch_free(adev, scratch);
642 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
643 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
644 amdgpu_ring_write(ring, 0xDEADBEEF);
645 amdgpu_ring_unlock_commit(ring);
647 for (i = 0; i < adev->usec_timeout; i++) {
648 tmp = RREG32(scratch);
649 if (tmp == 0xDEADBEEF)
653 if (i < adev->usec_timeout) {
654 DRM_INFO("ring test on %d succeeded in %d usecs\n",
657 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
658 ring->idx, scratch, tmp);
661 amdgpu_gfx_scratch_free(adev, scratch);
665 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
667 struct amdgpu_device *adev = ring->adev;
669 struct fence *f = NULL;
675 r = amdgpu_gfx_scratch_get(adev, &scratch);
677 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
680 WREG32(scratch, 0xCAFEDEAD);
681 memset(&ib, 0, sizeof(ib));
682 r = amdgpu_ib_get(ring, NULL, 256, &ib);
684 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
687 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
688 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
689 ib.ptr[2] = 0xDEADBEEF;
692 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
693 AMDGPU_FENCE_OWNER_UNDEFINED,
698 r = fence_wait(f, false);
700 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
703 for (i = 0; i < adev->usec_timeout; i++) {
704 tmp = RREG32(scratch);
705 if (tmp == 0xDEADBEEF)
709 if (i < adev->usec_timeout) {
710 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
714 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
720 amdgpu_ib_free(adev, &ib);
722 amdgpu_gfx_scratch_free(adev, scratch);
726 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
728 const char *chip_name;
731 struct amdgpu_firmware_info *info = NULL;
732 const struct common_firmware_header *header = NULL;
733 const struct gfx_firmware_header_v1_0 *cp_hdr;
737 switch (adev->asic_type) {
745 chip_name = "carrizo";
751 chip_name = "stoney";
757 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
758 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
761 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
764 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
765 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
766 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
768 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
769 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
772 err = amdgpu_ucode_validate(adev->gfx.me_fw);
775 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
776 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
777 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
779 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
780 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
783 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
786 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
787 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
788 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
790 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
791 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
794 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
795 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
796 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
797 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
799 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
800 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
803 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
806 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
807 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
808 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
810 if (adev->asic_type != CHIP_STONEY) {
811 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
812 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
814 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
817 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
818 adev->gfx.mec2_fw->data;
819 adev->gfx.mec2_fw_version =
820 le32_to_cpu(cp_hdr->header.ucode_version);
821 adev->gfx.mec2_feature_version =
822 le32_to_cpu(cp_hdr->ucode_feature_version);
825 adev->gfx.mec2_fw = NULL;
829 if (adev->firmware.smu_load) {
830 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
831 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
832 info->fw = adev->gfx.pfp_fw;
833 header = (const struct common_firmware_header *)info->fw->data;
834 adev->firmware.fw_size +=
835 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
837 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
838 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
839 info->fw = adev->gfx.me_fw;
840 header = (const struct common_firmware_header *)info->fw->data;
841 adev->firmware.fw_size +=
842 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
844 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
845 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
846 info->fw = adev->gfx.ce_fw;
847 header = (const struct common_firmware_header *)info->fw->data;
848 adev->firmware.fw_size +=
849 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
851 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
852 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
853 info->fw = adev->gfx.rlc_fw;
854 header = (const struct common_firmware_header *)info->fw->data;
855 adev->firmware.fw_size +=
856 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
858 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
859 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
860 info->fw = adev->gfx.mec_fw;
861 header = (const struct common_firmware_header *)info->fw->data;
862 adev->firmware.fw_size +=
863 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
865 if (adev->gfx.mec2_fw) {
866 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
867 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
868 info->fw = adev->gfx.mec2_fw;
869 header = (const struct common_firmware_header *)info->fw->data;
870 adev->firmware.fw_size +=
871 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
879 "gfx8: Failed to load firmware \"%s\"\n",
881 release_firmware(adev->gfx.pfp_fw);
882 adev->gfx.pfp_fw = NULL;
883 release_firmware(adev->gfx.me_fw);
884 adev->gfx.me_fw = NULL;
885 release_firmware(adev->gfx.ce_fw);
886 adev->gfx.ce_fw = NULL;
887 release_firmware(adev->gfx.rlc_fw);
888 adev->gfx.rlc_fw = NULL;
889 release_firmware(adev->gfx.mec_fw);
890 adev->gfx.mec_fw = NULL;
891 release_firmware(adev->gfx.mec2_fw);
892 adev->gfx.mec2_fw = NULL;
897 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
901 if (adev->gfx.mec.hpd_eop_obj) {
902 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
903 if (unlikely(r != 0))
904 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
905 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
906 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
908 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
909 adev->gfx.mec.hpd_eop_obj = NULL;
913 #define MEC_HPD_SIZE 2048
915 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
921 * we assign only 1 pipe because all other pipes will
924 adev->gfx.mec.num_mec = 1;
925 adev->gfx.mec.num_pipe = 1;
926 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
928 if (adev->gfx.mec.hpd_eop_obj == NULL) {
929 r = amdgpu_bo_create(adev,
930 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
932 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
933 &adev->gfx.mec.hpd_eop_obj);
935 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
940 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
941 if (unlikely(r != 0)) {
942 gfx_v8_0_mec_fini(adev);
945 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
946 &adev->gfx.mec.hpd_eop_gpu_addr);
948 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
949 gfx_v8_0_mec_fini(adev);
952 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
954 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
955 gfx_v8_0_mec_fini(adev);
959 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
961 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
962 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
967 static const u32 vgpr_init_compute_shader[] =
969 0x7e000209, 0x7e020208,
970 0x7e040207, 0x7e060206,
971 0x7e080205, 0x7e0a0204,
972 0x7e0c0203, 0x7e0e0202,
973 0x7e100201, 0x7e120200,
974 0x7e140209, 0x7e160208,
975 0x7e180207, 0x7e1a0206,
976 0x7e1c0205, 0x7e1e0204,
977 0x7e200203, 0x7e220202,
978 0x7e240201, 0x7e260200,
979 0x7e280209, 0x7e2a0208,
980 0x7e2c0207, 0x7e2e0206,
981 0x7e300205, 0x7e320204,
982 0x7e340203, 0x7e360202,
983 0x7e380201, 0x7e3a0200,
984 0x7e3c0209, 0x7e3e0208,
985 0x7e400207, 0x7e420206,
986 0x7e440205, 0x7e460204,
987 0x7e480203, 0x7e4a0202,
988 0x7e4c0201, 0x7e4e0200,
989 0x7e500209, 0x7e520208,
990 0x7e540207, 0x7e560206,
991 0x7e580205, 0x7e5a0204,
992 0x7e5c0203, 0x7e5e0202,
993 0x7e600201, 0x7e620200,
994 0x7e640209, 0x7e660208,
995 0x7e680207, 0x7e6a0206,
996 0x7e6c0205, 0x7e6e0204,
997 0x7e700203, 0x7e720202,
998 0x7e740201, 0x7e760200,
999 0x7e780209, 0x7e7a0208,
1000 0x7e7c0207, 0x7e7e0206,
1001 0xbf8a0000, 0xbf810000,
1004 static const u32 sgpr_init_compute_shader[] =
1006 0xbe8a0100, 0xbe8c0102,
1007 0xbe8e0104, 0xbe900106,
1008 0xbe920108, 0xbe940100,
1009 0xbe960102, 0xbe980104,
1010 0xbe9a0106, 0xbe9c0108,
1011 0xbe9e0100, 0xbea00102,
1012 0xbea20104, 0xbea40106,
1013 0xbea60108, 0xbea80100,
1014 0xbeaa0102, 0xbeac0104,
1015 0xbeae0106, 0xbeb00108,
1016 0xbeb20100, 0xbeb40102,
1017 0xbeb60104, 0xbeb80106,
1018 0xbeba0108, 0xbebc0100,
1019 0xbebe0102, 0xbec00104,
1020 0xbec20106, 0xbec40108,
1021 0xbec60100, 0xbec80102,
1022 0xbee60004, 0xbee70005,
1023 0xbeea0006, 0xbeeb0007,
1024 0xbee80008, 0xbee90009,
1025 0xbefc0000, 0xbf8a0000,
1026 0xbf810000, 0x00000000,
1029 static const u32 vgpr_init_regs[] =
1031 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1032 mmCOMPUTE_RESOURCE_LIMITS, 0,
1033 mmCOMPUTE_NUM_THREAD_X, 256*4,
1034 mmCOMPUTE_NUM_THREAD_Y, 1,
1035 mmCOMPUTE_NUM_THREAD_Z, 1,
1036 mmCOMPUTE_PGM_RSRC2, 20,
1037 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1038 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1039 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1040 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1041 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1042 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1043 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1044 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1045 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1046 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1049 static const u32 sgpr1_init_regs[] =
1051 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1052 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1053 mmCOMPUTE_NUM_THREAD_X, 256*5,
1054 mmCOMPUTE_NUM_THREAD_Y, 1,
1055 mmCOMPUTE_NUM_THREAD_Z, 1,
1056 mmCOMPUTE_PGM_RSRC2, 20,
1057 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1058 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1059 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1060 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1061 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1062 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1063 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1064 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1065 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1066 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1069 static const u32 sgpr2_init_regs[] =
1071 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1072 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1073 mmCOMPUTE_NUM_THREAD_X, 256*5,
1074 mmCOMPUTE_NUM_THREAD_Y, 1,
1075 mmCOMPUTE_NUM_THREAD_Z, 1,
1076 mmCOMPUTE_PGM_RSRC2, 20,
1077 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1078 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1079 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1080 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1081 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1082 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1083 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1084 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1085 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1086 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1089 static const u32 sec_ded_counter_registers[] =
1092 mmCPC_EDC_SCRATCH_CNT,
1093 mmCPC_EDC_UCODE_CNT,
1100 mmDC_EDC_CSINVOC_CNT,
1101 mmDC_EDC_RESTORE_CNT,
1107 mmSQC_ATC_EDC_GATCL1_CNT,
1113 mmTCP_ATC_EDC_GATCL1_CNT,
1118 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1120 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1121 struct amdgpu_ib ib;
1122 struct fence *f = NULL;
1125 unsigned total_size, vgpr_offset, sgpr_offset;
1128 /* only supported on CZ */
1129 if (adev->asic_type != CHIP_CARRIZO)
1132 /* bail if the compute ring is not ready */
1136 tmp = RREG32(mmGB_EDC_MODE);
1137 WREG32(mmGB_EDC_MODE, 0);
1140 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1142 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1144 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1145 total_size = ALIGN(total_size, 256);
1146 vgpr_offset = total_size;
1147 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1148 sgpr_offset = total_size;
1149 total_size += sizeof(sgpr_init_compute_shader);
1151 /* allocate an indirect buffer to put the commands in */
1152 memset(&ib, 0, sizeof(ib));
1153 r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1155 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1159 /* load the compute shaders */
1160 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1161 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1163 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1164 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1166 /* init the ib length to 0 */
1170 /* write the register state for the compute dispatch */
1171 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1172 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1173 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1174 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1176 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1177 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1178 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1179 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1180 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1181 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1183 /* write dispatch packet */
1184 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1185 ib.ptr[ib.length_dw++] = 8; /* x */
1186 ib.ptr[ib.length_dw++] = 1; /* y */
1187 ib.ptr[ib.length_dw++] = 1; /* z */
1188 ib.ptr[ib.length_dw++] =
1189 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1191 /* write CS partial flush packet */
1192 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1193 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1196 /* write the register state for the compute dispatch */
1197 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1198 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1199 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1200 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1202 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1203 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1204 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1205 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1206 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1207 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1209 /* write dispatch packet */
1210 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1211 ib.ptr[ib.length_dw++] = 8; /* x */
1212 ib.ptr[ib.length_dw++] = 1; /* y */
1213 ib.ptr[ib.length_dw++] = 1; /* z */
1214 ib.ptr[ib.length_dw++] =
1215 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1217 /* write CS partial flush packet */
1218 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1219 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1222 /* write the register state for the compute dispatch */
1223 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1224 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1225 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1226 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1228 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1229 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1230 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1231 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1232 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1233 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1235 /* write dispatch packet */
1236 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1237 ib.ptr[ib.length_dw++] = 8; /* x */
1238 ib.ptr[ib.length_dw++] = 1; /* y */
1239 ib.ptr[ib.length_dw++] = 1; /* z */
1240 ib.ptr[ib.length_dw++] =
1241 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1243 /* write CS partial flush packet */
1244 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1245 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1247 /* shedule the ib on the ring */
1248 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1249 AMDGPU_FENCE_OWNER_UNDEFINED,
1252 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1256 /* wait for the GPU to finish processing the IB */
1257 r = fence_wait(f, false);
1259 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1263 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1264 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1265 WREG32(mmGB_EDC_MODE, tmp);
1267 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1268 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1269 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1272 /* read back registers to clear the counters */
1273 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1274 RREG32(sec_ded_counter_registers[i]);
1278 amdgpu_ib_free(adev, &ib);
1283 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1286 u32 mc_shared_chmap, mc_arb_ramcfg;
1287 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1290 switch (adev->asic_type) {
1292 adev->gfx.config.max_shader_engines = 1;
1293 adev->gfx.config.max_tile_pipes = 2;
1294 adev->gfx.config.max_cu_per_sh = 6;
1295 adev->gfx.config.max_sh_per_se = 1;
1296 adev->gfx.config.max_backends_per_se = 2;
1297 adev->gfx.config.max_texture_channel_caches = 2;
1298 adev->gfx.config.max_gprs = 256;
1299 adev->gfx.config.max_gs_threads = 32;
1300 adev->gfx.config.max_hw_contexts = 8;
1302 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1303 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1304 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1305 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1306 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1309 adev->gfx.config.max_shader_engines = 4;
1310 adev->gfx.config.max_tile_pipes = 16;
1311 adev->gfx.config.max_cu_per_sh = 16;
1312 adev->gfx.config.max_sh_per_se = 1;
1313 adev->gfx.config.max_backends_per_se = 4;
1314 adev->gfx.config.max_texture_channel_caches = 16;
1315 adev->gfx.config.max_gprs = 256;
1316 adev->gfx.config.max_gs_threads = 32;
1317 adev->gfx.config.max_hw_contexts = 8;
1319 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1320 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1321 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1322 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1323 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1326 adev->gfx.config.max_shader_engines = 4;
1327 adev->gfx.config.max_tile_pipes = 8;
1328 adev->gfx.config.max_cu_per_sh = 8;
1329 adev->gfx.config.max_sh_per_se = 1;
1330 adev->gfx.config.max_backends_per_se = 2;
1331 adev->gfx.config.max_texture_channel_caches = 8;
1332 adev->gfx.config.max_gprs = 256;
1333 adev->gfx.config.max_gs_threads = 32;
1334 adev->gfx.config.max_hw_contexts = 8;
1336 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1337 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1338 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1339 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1340 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1343 adev->gfx.config.max_shader_engines = 1;
1344 adev->gfx.config.max_tile_pipes = 2;
1345 adev->gfx.config.max_sh_per_se = 1;
1346 adev->gfx.config.max_backends_per_se = 2;
1348 switch (adev->pdev->revision) {
1356 adev->gfx.config.max_cu_per_sh = 8;
1366 adev->gfx.config.max_cu_per_sh = 6;
1373 adev->gfx.config.max_cu_per_sh = 6;
1382 adev->gfx.config.max_cu_per_sh = 4;
1386 adev->gfx.config.max_texture_channel_caches = 2;
1387 adev->gfx.config.max_gprs = 256;
1388 adev->gfx.config.max_gs_threads = 32;
1389 adev->gfx.config.max_hw_contexts = 8;
1391 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1392 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1393 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1394 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1395 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1398 adev->gfx.config.max_shader_engines = 1;
1399 adev->gfx.config.max_tile_pipes = 2;
1400 adev->gfx.config.max_sh_per_se = 1;
1401 adev->gfx.config.max_backends_per_se = 1;
1403 switch (adev->pdev->revision) {
1410 adev->gfx.config.max_cu_per_sh = 3;
1416 adev->gfx.config.max_cu_per_sh = 2;
1420 adev->gfx.config.max_texture_channel_caches = 2;
1421 adev->gfx.config.max_gprs = 256;
1422 adev->gfx.config.max_gs_threads = 16;
1423 adev->gfx.config.max_hw_contexts = 8;
1425 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1426 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1427 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1428 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1429 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1432 adev->gfx.config.max_shader_engines = 2;
1433 adev->gfx.config.max_tile_pipes = 4;
1434 adev->gfx.config.max_cu_per_sh = 2;
1435 adev->gfx.config.max_sh_per_se = 1;
1436 adev->gfx.config.max_backends_per_se = 2;
1437 adev->gfx.config.max_texture_channel_caches = 4;
1438 adev->gfx.config.max_gprs = 256;
1439 adev->gfx.config.max_gs_threads = 32;
1440 adev->gfx.config.max_hw_contexts = 8;
1442 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1443 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1444 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1445 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1446 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1450 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1451 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1452 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1454 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1455 adev->gfx.config.mem_max_burst_length_bytes = 256;
1456 if (adev->flags & AMD_IS_APU) {
1457 /* Get memory bank mapping mode. */
1458 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1459 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1460 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1462 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1463 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1464 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1466 /* Validate settings in case only one DIMM installed. */
1467 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1468 dimm00_addr_map = 0;
1469 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1470 dimm01_addr_map = 0;
1471 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1472 dimm10_addr_map = 0;
1473 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1474 dimm11_addr_map = 0;
1476 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1477 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1478 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1479 adev->gfx.config.mem_row_size_in_kb = 2;
1481 adev->gfx.config.mem_row_size_in_kb = 1;
1483 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1484 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1485 if (adev->gfx.config.mem_row_size_in_kb > 4)
1486 adev->gfx.config.mem_row_size_in_kb = 4;
1489 adev->gfx.config.shader_engine_tile_size = 32;
1490 adev->gfx.config.num_gpus = 1;
1491 adev->gfx.config.multi_gpu_tile_size = 64;
1493 /* fix up row size */
1494 switch (adev->gfx.config.mem_row_size_in_kb) {
1497 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1500 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1503 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1506 adev->gfx.config.gb_addr_config = gb_addr_config;
1509 static int gfx_v8_0_sw_init(void *handle)
1512 struct amdgpu_ring *ring;
1513 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1516 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1520 /* Privileged reg */
1521 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1525 /* Privileged inst */
1526 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1530 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1532 gfx_v8_0_scratch_init(adev);
1534 r = gfx_v8_0_init_microcode(adev);
1536 DRM_ERROR("Failed to load gfx firmware!\n");
1540 r = gfx_v8_0_mec_init(adev);
1542 DRM_ERROR("Failed to init MEC BOs!\n");
1546 /* set up the gfx ring */
1547 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1548 ring = &adev->gfx.gfx_ring[i];
1549 ring->ring_obj = NULL;
1550 sprintf(ring->name, "gfx");
1551 /* no gfx doorbells on iceland */
1552 if (adev->asic_type != CHIP_TOPAZ) {
1553 ring->use_doorbell = true;
1554 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1557 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1558 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1559 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1560 AMDGPU_RING_TYPE_GFX);
1565 /* set up the compute queues */
1566 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1569 /* max 32 queues per MEC */
1570 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1571 DRM_ERROR("Too many (%d) compute rings!\n", i);
1574 ring = &adev->gfx.compute_ring[i];
1575 ring->ring_obj = NULL;
1576 ring->use_doorbell = true;
1577 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1578 ring->me = 1; /* first MEC */
1580 ring->queue = i % 8;
1581 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1582 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1583 /* type-2 packets are deprecated on MEC, use type-3 instead */
1584 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1585 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1586 &adev->gfx.eop_irq, irq_type,
1587 AMDGPU_RING_TYPE_COMPUTE);
1592 /* reserve GDS, GWS and OA resource for gfx */
1593 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1595 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1596 NULL, &adev->gds.gds_gfx_bo);
1600 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1602 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1603 NULL, &adev->gds.gws_gfx_bo);
1607 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1609 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1610 NULL, &adev->gds.oa_gfx_bo);
1614 adev->gfx.ce_ram_size = 0x8000;
1616 gfx_v8_0_gpu_early_init(adev);
1621 static int gfx_v8_0_sw_fini(void *handle)
1624 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1626 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1627 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1628 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1630 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1631 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1632 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1633 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1635 gfx_v8_0_mec_fini(adev);
1640 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1642 uint32_t *modearray, *mod2array;
1643 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1644 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1647 modearray = adev->gfx.config.tile_mode_array;
1648 mod2array = adev->gfx.config.macrotile_mode_array;
1650 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1651 modearray[reg_offset] = 0;
1653 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1654 mod2array[reg_offset] = 0;
1656 switch (adev->asic_type) {
1658 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1659 PIPE_CONFIG(ADDR_SURF_P2) |
1660 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1661 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1662 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1663 PIPE_CONFIG(ADDR_SURF_P2) |
1664 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1665 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1666 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1667 PIPE_CONFIG(ADDR_SURF_P2) |
1668 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1669 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1670 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1671 PIPE_CONFIG(ADDR_SURF_P2) |
1672 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1673 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1674 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1675 PIPE_CONFIG(ADDR_SURF_P2) |
1676 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1677 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1678 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1679 PIPE_CONFIG(ADDR_SURF_P2) |
1680 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1681 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1682 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1683 PIPE_CONFIG(ADDR_SURF_P2) |
1684 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1685 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1686 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1687 PIPE_CONFIG(ADDR_SURF_P2));
1688 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1689 PIPE_CONFIG(ADDR_SURF_P2) |
1690 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1692 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1693 PIPE_CONFIG(ADDR_SURF_P2) |
1694 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1696 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1697 PIPE_CONFIG(ADDR_SURF_P2) |
1698 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1700 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1701 PIPE_CONFIG(ADDR_SURF_P2) |
1702 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1704 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1705 PIPE_CONFIG(ADDR_SURF_P2) |
1706 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1707 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1708 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1709 PIPE_CONFIG(ADDR_SURF_P2) |
1710 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1711 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1712 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1713 PIPE_CONFIG(ADDR_SURF_P2) |
1714 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1715 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1716 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1717 PIPE_CONFIG(ADDR_SURF_P2) |
1718 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1719 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1720 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1721 PIPE_CONFIG(ADDR_SURF_P2) |
1722 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1723 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1724 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1725 PIPE_CONFIG(ADDR_SURF_P2) |
1726 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1727 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1728 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1729 PIPE_CONFIG(ADDR_SURF_P2) |
1730 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1731 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1732 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1733 PIPE_CONFIG(ADDR_SURF_P2) |
1734 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1737 PIPE_CONFIG(ADDR_SURF_P2) |
1738 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1739 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1740 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1741 PIPE_CONFIG(ADDR_SURF_P2) |
1742 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1744 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1745 PIPE_CONFIG(ADDR_SURF_P2) |
1746 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1749 PIPE_CONFIG(ADDR_SURF_P2) |
1750 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1751 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1752 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1753 PIPE_CONFIG(ADDR_SURF_P2) |
1754 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1755 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1756 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1757 PIPE_CONFIG(ADDR_SURF_P2) |
1758 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1761 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1764 NUM_BANKS(ADDR_SURF_8_BANK));
1765 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1768 NUM_BANKS(ADDR_SURF_8_BANK));
1769 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1770 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1771 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1772 NUM_BANKS(ADDR_SURF_8_BANK));
1773 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1776 NUM_BANKS(ADDR_SURF_8_BANK));
1777 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1778 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1779 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1780 NUM_BANKS(ADDR_SURF_8_BANK));
1781 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1782 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1783 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1784 NUM_BANKS(ADDR_SURF_8_BANK));
1785 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1786 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1787 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1788 NUM_BANKS(ADDR_SURF_8_BANK));
1789 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1790 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1791 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1792 NUM_BANKS(ADDR_SURF_16_BANK));
1793 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1794 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1795 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1796 NUM_BANKS(ADDR_SURF_16_BANK));
1797 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1798 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1799 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1800 NUM_BANKS(ADDR_SURF_16_BANK));
1801 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1804 NUM_BANKS(ADDR_SURF_16_BANK));
1805 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1808 NUM_BANKS(ADDR_SURF_16_BANK));
1809 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1810 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1811 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1812 NUM_BANKS(ADDR_SURF_16_BANK));
1813 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1816 NUM_BANKS(ADDR_SURF_8_BANK));
1818 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1819 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1821 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1823 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1824 if (reg_offset != 7)
1825 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1829 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1831 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1833 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1834 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1835 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1836 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1837 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1838 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1839 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1841 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1842 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1843 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1844 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1845 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1847 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1848 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1849 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1850 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1851 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1852 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1853 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1854 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1855 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1856 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1857 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1858 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1859 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1860 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1861 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1862 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1863 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1864 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1865 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1867 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1868 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1869 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1871 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1872 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1873 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1875 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1877 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1879 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1880 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1884 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1885 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1887 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1888 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1889 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1891 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1892 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1893 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1895 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1896 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1897 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1899 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1900 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1901 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1903 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1904 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1905 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1907 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1908 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1909 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1911 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1912 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1913 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1915 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1916 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1917 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1919 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1920 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1921 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1923 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1924 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1925 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1927 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1928 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1929 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1931 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1932 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1933 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1934 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1935 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1936 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1937 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1939 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1941 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1943 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1944 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1945 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1946 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1947 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1948 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1949 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1952 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1955 NUM_BANKS(ADDR_SURF_8_BANK));
1956 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1959 NUM_BANKS(ADDR_SURF_8_BANK));
1960 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1963 NUM_BANKS(ADDR_SURF_8_BANK));
1964 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1967 NUM_BANKS(ADDR_SURF_8_BANK));
1968 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1971 NUM_BANKS(ADDR_SURF_8_BANK));
1972 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1975 NUM_BANKS(ADDR_SURF_8_BANK));
1976 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1977 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1978 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1979 NUM_BANKS(ADDR_SURF_8_BANK));
1980 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1983 NUM_BANKS(ADDR_SURF_8_BANK));
1984 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1987 NUM_BANKS(ADDR_SURF_8_BANK));
1988 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1989 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1990 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1991 NUM_BANKS(ADDR_SURF_8_BANK));
1992 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1995 NUM_BANKS(ADDR_SURF_8_BANK));
1996 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1999 NUM_BANKS(ADDR_SURF_8_BANK));
2000 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2003 NUM_BANKS(ADDR_SURF_8_BANK));
2004 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2005 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2006 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2007 NUM_BANKS(ADDR_SURF_4_BANK));
2009 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2010 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2012 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2013 if (reg_offset != 7)
2014 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2018 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2020 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2021 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2022 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2023 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2024 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2025 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2026 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2027 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2028 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2029 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2030 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2031 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2032 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2033 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2034 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2036 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2037 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2038 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2040 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2041 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2044 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2045 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2046 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2047 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2049 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2050 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2051 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2052 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2053 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2056 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2057 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2060 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2064 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2065 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2068 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2069 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2070 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2073 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2077 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2078 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2081 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2082 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2084 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2085 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2086 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2088 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2089 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2092 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2093 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2094 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2096 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2097 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2099 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2100 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2101 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2102 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2104 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2105 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2106 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2107 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2108 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2109 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2110 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2112 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2113 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2116 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2117 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2120 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2121 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2124 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2126 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2133 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2134 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2136 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2141 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2142 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2143 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2144 NUM_BANKS(ADDR_SURF_16_BANK));
2145 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2148 NUM_BANKS(ADDR_SURF_16_BANK));
2149 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2152 NUM_BANKS(ADDR_SURF_16_BANK));
2153 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2154 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2155 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2156 NUM_BANKS(ADDR_SURF_16_BANK));
2157 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2158 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2159 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2160 NUM_BANKS(ADDR_SURF_16_BANK));
2161 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164 NUM_BANKS(ADDR_SURF_16_BANK));
2165 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2168 NUM_BANKS(ADDR_SURF_16_BANK));
2169 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2171 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2172 NUM_BANKS(ADDR_SURF_16_BANK));
2173 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2176 NUM_BANKS(ADDR_SURF_16_BANK));
2177 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2179 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2180 NUM_BANKS(ADDR_SURF_16_BANK));
2181 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2182 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2183 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2184 NUM_BANKS(ADDR_SURF_16_BANK));
2185 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188 NUM_BANKS(ADDR_SURF_8_BANK));
2189 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2192 NUM_BANKS(ADDR_SURF_4_BANK));
2193 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2196 NUM_BANKS(ADDR_SURF_4_BANK));
2198 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2199 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2202 if (reg_offset != 7)
2203 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2207 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2208 PIPE_CONFIG(ADDR_SURF_P2) |
2209 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2210 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2211 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212 PIPE_CONFIG(ADDR_SURF_P2) |
2213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2214 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2215 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2216 PIPE_CONFIG(ADDR_SURF_P2) |
2217 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2218 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2219 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220 PIPE_CONFIG(ADDR_SURF_P2) |
2221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2223 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224 PIPE_CONFIG(ADDR_SURF_P2) |
2225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2228 PIPE_CONFIG(ADDR_SURF_P2) |
2229 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2230 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2232 PIPE_CONFIG(ADDR_SURF_P2) |
2233 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2234 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2236 PIPE_CONFIG(ADDR_SURF_P2));
2237 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2238 PIPE_CONFIG(ADDR_SURF_P2) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242 PIPE_CONFIG(ADDR_SURF_P2) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2245 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2246 PIPE_CONFIG(ADDR_SURF_P2) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2249 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2250 PIPE_CONFIG(ADDR_SURF_P2) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254 PIPE_CONFIG(ADDR_SURF_P2) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2257 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2258 PIPE_CONFIG(ADDR_SURF_P2) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2262 PIPE_CONFIG(ADDR_SURF_P2) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2265 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2266 PIPE_CONFIG(ADDR_SURF_P2) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2269 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2270 PIPE_CONFIG(ADDR_SURF_P2) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2272 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2273 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2274 PIPE_CONFIG(ADDR_SURF_P2) |
2275 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2277 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2278 PIPE_CONFIG(ADDR_SURF_P2) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2281 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2282 PIPE_CONFIG(ADDR_SURF_P2) |
2283 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2286 PIPE_CONFIG(ADDR_SURF_P2) |
2287 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2290 PIPE_CONFIG(ADDR_SURF_P2) |
2291 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2294 PIPE_CONFIG(ADDR_SURF_P2) |
2295 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2298 PIPE_CONFIG(ADDR_SURF_P2) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2301 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302 PIPE_CONFIG(ADDR_SURF_P2) |
2303 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2306 PIPE_CONFIG(ADDR_SURF_P2) |
2307 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2310 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2311 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2312 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2313 NUM_BANKS(ADDR_SURF_8_BANK));
2314 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2316 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2317 NUM_BANKS(ADDR_SURF_8_BANK));
2318 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2321 NUM_BANKS(ADDR_SURF_8_BANK));
2322 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2323 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2324 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2325 NUM_BANKS(ADDR_SURF_8_BANK));
2326 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2328 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2329 NUM_BANKS(ADDR_SURF_8_BANK));
2330 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2331 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2332 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2333 NUM_BANKS(ADDR_SURF_8_BANK));
2334 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337 NUM_BANKS(ADDR_SURF_8_BANK));
2338 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2341 NUM_BANKS(ADDR_SURF_16_BANK));
2342 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2345 NUM_BANKS(ADDR_SURF_16_BANK));
2346 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2349 NUM_BANKS(ADDR_SURF_16_BANK));
2350 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2353 NUM_BANKS(ADDR_SURF_16_BANK));
2354 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2357 NUM_BANKS(ADDR_SURF_16_BANK));
2358 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2360 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361 NUM_BANKS(ADDR_SURF_16_BANK));
2362 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365 NUM_BANKS(ADDR_SURF_8_BANK));
2367 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2368 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2370 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2372 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2373 if (reg_offset != 7)
2374 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2379 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2383 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384 PIPE_CONFIG(ADDR_SURF_P2) |
2385 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2387 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2388 PIPE_CONFIG(ADDR_SURF_P2) |
2389 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2391 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392 PIPE_CONFIG(ADDR_SURF_P2) |
2393 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2395 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396 PIPE_CONFIG(ADDR_SURF_P2) |
2397 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2399 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 PIPE_CONFIG(ADDR_SURF_P2) |
2401 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 PIPE_CONFIG(ADDR_SURF_P2) |
2405 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2408 PIPE_CONFIG(ADDR_SURF_P2) |
2409 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2412 PIPE_CONFIG(ADDR_SURF_P2));
2413 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 PIPE_CONFIG(ADDR_SURF_P2) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418 PIPE_CONFIG(ADDR_SURF_P2) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 PIPE_CONFIG(ADDR_SURF_P2) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2425 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426 PIPE_CONFIG(ADDR_SURF_P2) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430 PIPE_CONFIG(ADDR_SURF_P2) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2434 PIPE_CONFIG(ADDR_SURF_P2) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438 PIPE_CONFIG(ADDR_SURF_P2) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2442 PIPE_CONFIG(ADDR_SURF_P2) |
2443 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2445 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2446 PIPE_CONFIG(ADDR_SURF_P2) |
2447 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2449 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2450 PIPE_CONFIG(ADDR_SURF_P2) |
2451 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2453 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2454 PIPE_CONFIG(ADDR_SURF_P2) |
2455 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2457 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2458 PIPE_CONFIG(ADDR_SURF_P2) |
2459 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2461 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2462 PIPE_CONFIG(ADDR_SURF_P2) |
2463 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2465 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2466 PIPE_CONFIG(ADDR_SURF_P2) |
2467 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2469 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2470 PIPE_CONFIG(ADDR_SURF_P2) |
2471 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474 PIPE_CONFIG(ADDR_SURF_P2) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2477 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2478 PIPE_CONFIG(ADDR_SURF_P2) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2481 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2482 PIPE_CONFIG(ADDR_SURF_P2) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2486 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2489 NUM_BANKS(ADDR_SURF_8_BANK));
2490 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 NUM_BANKS(ADDR_SURF_8_BANK));
2494 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497 NUM_BANKS(ADDR_SURF_8_BANK));
2498 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2501 NUM_BANKS(ADDR_SURF_8_BANK));
2502 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2505 NUM_BANKS(ADDR_SURF_8_BANK));
2506 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2509 NUM_BANKS(ADDR_SURF_8_BANK));
2510 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2513 NUM_BANKS(ADDR_SURF_8_BANK));
2514 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 NUM_BANKS(ADDR_SURF_16_BANK));
2518 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2521 NUM_BANKS(ADDR_SURF_16_BANK));
2522 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2525 NUM_BANKS(ADDR_SURF_16_BANK));
2526 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529 NUM_BANKS(ADDR_SURF_16_BANK));
2530 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2533 NUM_BANKS(ADDR_SURF_16_BANK));
2534 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2537 NUM_BANKS(ADDR_SURF_16_BANK));
2538 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541 NUM_BANKS(ADDR_SURF_8_BANK));
2543 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2544 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2546 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2548 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2549 if (reg_offset != 7)
2550 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2556 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2558 return (u32)((1ULL << bit_width) - 1);
2561 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2563 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2565 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2566 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2567 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2568 } else if (se_num == 0xffffffff) {
2569 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2570 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2571 } else if (sh_num == 0xffffffff) {
2572 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2573 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2575 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2576 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2578 WREG32(mmGRBM_GFX_INDEX, data);
2581 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2582 u32 max_rb_num_per_se,
2587 data = RREG32(mmCC_RB_BACKEND_DISABLE);
2588 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2590 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2592 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2594 mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2599 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2600 u32 se_num, u32 sh_per_se,
2601 u32 max_rb_num_per_se)
2605 u32 disabled_rbs = 0;
2606 u32 enabled_rbs = 0;
2608 mutex_lock(&adev->grbm_idx_mutex);
2609 for (i = 0; i < se_num; i++) {
2610 for (j = 0; j < sh_per_se; j++) {
2611 gfx_v8_0_select_se_sh(adev, i, j);
2612 data = gfx_v8_0_get_rb_disabled(adev,
2613 max_rb_num_per_se, sh_per_se);
2614 disabled_rbs |= data << ((i * sh_per_se + j) *
2615 RB_BITMAP_WIDTH_PER_SH);
2618 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2619 mutex_unlock(&adev->grbm_idx_mutex);
2622 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2623 if (!(disabled_rbs & mask))
2624 enabled_rbs |= mask;
2628 adev->gfx.config.backend_enable_mask = enabled_rbs;
2630 mutex_lock(&adev->grbm_idx_mutex);
2631 for (i = 0; i < se_num; i++) {
2632 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2633 data = RREG32(mmPA_SC_RASTER_CONFIG);
2634 for (j = 0; j < sh_per_se; j++) {
2635 switch (enabled_rbs & 3) {
2638 data |= (RASTER_CONFIG_RB_MAP_3 <<
2639 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2641 data |= (RASTER_CONFIG_RB_MAP_0 <<
2642 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2645 data |= (RASTER_CONFIG_RB_MAP_0 <<
2646 (i * sh_per_se + j) * 2);
2649 data |= (RASTER_CONFIG_RB_MAP_3 <<
2650 (i * sh_per_se + j) * 2);
2654 data |= (RASTER_CONFIG_RB_MAP_2 <<
2655 (i * sh_per_se + j) * 2);
2660 WREG32(mmPA_SC_RASTER_CONFIG, data);
2662 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2663 mutex_unlock(&adev->grbm_idx_mutex);
2667 * gfx_v8_0_init_compute_vmid - gart enable
2669 * @rdev: amdgpu_device pointer
2671 * Initialize compute vmid sh_mem registers
2674 #define DEFAULT_SH_MEM_BASES (0x6000)
2675 #define FIRST_COMPUTE_VMID (8)
2676 #define LAST_COMPUTE_VMID (16)
2677 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2680 uint32_t sh_mem_config;
2681 uint32_t sh_mem_bases;
2684 * Configure apertures:
2685 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2686 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2687 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2689 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2691 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2692 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2693 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2694 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2695 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2696 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2698 mutex_lock(&adev->srbm_mutex);
2699 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2700 vi_srbm_select(adev, 0, 0, 0, i);
2701 /* CP and shaders */
2702 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2703 WREG32(mmSH_MEM_APE1_BASE, 1);
2704 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2705 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2707 vi_srbm_select(adev, 0, 0, 0, 0);
2708 mutex_unlock(&adev->srbm_mutex);
2711 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2716 tmp = RREG32(mmGRBM_CNTL);
2717 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2718 WREG32(mmGRBM_CNTL, tmp);
2720 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2721 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2722 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2723 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2724 adev->gfx.config.gb_addr_config & 0x70);
2725 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2726 adev->gfx.config.gb_addr_config & 0x70);
2727 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2728 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2729 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2731 gfx_v8_0_tiling_mode_table_init(adev);
2733 gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2734 adev->gfx.config.max_sh_per_se,
2735 adev->gfx.config.max_backends_per_se);
2737 /* XXX SH_MEM regs */
2738 /* where to put LDS, scratch, GPUVM in FSA64 space */
2739 mutex_lock(&adev->srbm_mutex);
2740 for (i = 0; i < 16; i++) {
2741 vi_srbm_select(adev, 0, 0, 0, i);
2742 /* CP and shaders */
2744 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2745 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2746 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2747 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2748 WREG32(mmSH_MEM_CONFIG, tmp);
2750 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2751 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2752 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2753 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2754 WREG32(mmSH_MEM_CONFIG, tmp);
2757 WREG32(mmSH_MEM_APE1_BASE, 1);
2758 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2759 WREG32(mmSH_MEM_BASES, 0);
2761 vi_srbm_select(adev, 0, 0, 0, 0);
2762 mutex_unlock(&adev->srbm_mutex);
2764 gfx_v8_0_init_compute_vmid(adev);
2766 mutex_lock(&adev->grbm_idx_mutex);
2768 * making sure that the following register writes will be broadcasted
2769 * to all the shaders
2771 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2773 WREG32(mmPA_SC_FIFO_SIZE,
2774 (adev->gfx.config.sc_prim_fifo_size_frontend <<
2775 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2776 (adev->gfx.config.sc_prim_fifo_size_backend <<
2777 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2778 (adev->gfx.config.sc_hiz_tile_fifo_size <<
2779 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2780 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2781 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2782 mutex_unlock(&adev->grbm_idx_mutex);
2786 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2791 mutex_lock(&adev->grbm_idx_mutex);
2792 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2793 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2794 gfx_v8_0_select_se_sh(adev, i, j);
2795 for (k = 0; k < adev->usec_timeout; k++) {
2796 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2802 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2803 mutex_unlock(&adev->grbm_idx_mutex);
2805 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2806 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2807 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2808 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2809 for (k = 0; k < adev->usec_timeout; k++) {
2810 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2816 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2819 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2821 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2822 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2823 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2824 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2826 WREG32(mmCP_INT_CNTL_RING0, tmp);
2829 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2831 u32 tmp = RREG32(mmRLC_CNTL);
2833 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2834 WREG32(mmRLC_CNTL, tmp);
2836 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2838 gfx_v8_0_wait_for_rlc_serdes(adev);
2841 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2843 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2845 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2846 WREG32(mmGRBM_SOFT_RESET, tmp);
2848 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2849 WREG32(mmGRBM_SOFT_RESET, tmp);
2853 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2855 u32 tmp = RREG32(mmRLC_CNTL);
2857 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2858 WREG32(mmRLC_CNTL, tmp);
2860 /* carrizo do enable cp interrupt after cp inited */
2861 if (!(adev->flags & AMD_IS_APU))
2862 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2867 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2869 const struct rlc_firmware_header_v2_0 *hdr;
2870 const __le32 *fw_data;
2871 unsigned i, fw_size;
2873 if (!adev->gfx.rlc_fw)
2876 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2877 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2879 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2880 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2881 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2883 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2884 for (i = 0; i < fw_size; i++)
2885 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2886 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2891 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2895 gfx_v8_0_rlc_stop(adev);
2898 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2901 WREG32(mmRLC_PG_CNTL, 0);
2903 gfx_v8_0_rlc_reset(adev);
2905 if (!adev->pp_enabled) {
2906 if (!adev->firmware.smu_load) {
2907 /* legacy rlc firmware loading */
2908 r = gfx_v8_0_rlc_load_microcode(adev);
2912 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2913 AMDGPU_UCODE_ID_RLC_G);
2919 gfx_v8_0_rlc_start(adev);
2924 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2927 u32 tmp = RREG32(mmCP_ME_CNTL);
2930 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2931 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2932 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2934 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2935 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2936 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2937 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2938 adev->gfx.gfx_ring[i].ready = false;
2940 WREG32(mmCP_ME_CNTL, tmp);
2944 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2946 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2947 const struct gfx_firmware_header_v1_0 *ce_hdr;
2948 const struct gfx_firmware_header_v1_0 *me_hdr;
2949 const __le32 *fw_data;
2950 unsigned i, fw_size;
2952 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2955 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2956 adev->gfx.pfp_fw->data;
2957 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2958 adev->gfx.ce_fw->data;
2959 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2960 adev->gfx.me_fw->data;
2962 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2963 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2964 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2966 gfx_v8_0_cp_gfx_enable(adev, false);
2969 fw_data = (const __le32 *)
2970 (adev->gfx.pfp_fw->data +
2971 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2972 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2973 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2974 for (i = 0; i < fw_size; i++)
2975 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2976 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2979 fw_data = (const __le32 *)
2980 (adev->gfx.ce_fw->data +
2981 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2982 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2983 WREG32(mmCP_CE_UCODE_ADDR, 0);
2984 for (i = 0; i < fw_size; i++)
2985 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2986 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2989 fw_data = (const __le32 *)
2990 (adev->gfx.me_fw->data +
2991 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2992 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2993 WREG32(mmCP_ME_RAM_WADDR, 0);
2994 for (i = 0; i < fw_size; i++)
2995 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2996 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3001 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3004 const struct cs_section_def *sect = NULL;
3005 const struct cs_extent_def *ext = NULL;
3007 /* begin clear state */
3009 /* context control state */
3012 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3013 for (ext = sect->section; ext->extent != NULL; ++ext) {
3014 if (sect->id == SECT_CONTEXT)
3015 count += 2 + ext->reg_count;
3020 /* pa_sc_raster_config/pa_sc_raster_config1 */
3022 /* end clear state */
3030 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3032 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3033 const struct cs_section_def *sect = NULL;
3034 const struct cs_extent_def *ext = NULL;
3038 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3039 WREG32(mmCP_ENDIAN_SWAP, 0);
3040 WREG32(mmCP_DEVICE_ID, 1);
3042 gfx_v8_0_cp_gfx_enable(adev, true);
3044 r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
3046 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3050 /* clear state buffer */
3051 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3052 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3054 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3055 amdgpu_ring_write(ring, 0x80000000);
3056 amdgpu_ring_write(ring, 0x80000000);
3058 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3059 for (ext = sect->section; ext->extent != NULL; ++ext) {
3060 if (sect->id == SECT_CONTEXT) {
3061 amdgpu_ring_write(ring,
3062 PACKET3(PACKET3_SET_CONTEXT_REG,
3064 amdgpu_ring_write(ring,
3065 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3066 for (i = 0; i < ext->reg_count; i++)
3067 amdgpu_ring_write(ring, ext->extent[i]);
3072 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3073 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3074 switch (adev->asic_type) {
3076 amdgpu_ring_write(ring, 0x16000012);
3077 amdgpu_ring_write(ring, 0x0000002A);
3080 amdgpu_ring_write(ring, 0x3a00161a);
3081 amdgpu_ring_write(ring, 0x0000002e);
3085 amdgpu_ring_write(ring, 0x00000002);
3086 amdgpu_ring_write(ring, 0x00000000);
3089 amdgpu_ring_write(ring, 0x00000000);
3090 amdgpu_ring_write(ring, 0x00000000);
3096 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3097 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3099 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3100 amdgpu_ring_write(ring, 0);
3102 /* init the CE partitions */
3103 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3104 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3105 amdgpu_ring_write(ring, 0x8000);
3106 amdgpu_ring_write(ring, 0x8000);
3108 amdgpu_ring_unlock_commit(ring);
3113 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3115 struct amdgpu_ring *ring;
3118 u64 rb_addr, rptr_addr;
3121 /* Set the write pointer delay */
3122 WREG32(mmCP_RB_WPTR_DELAY, 0);
3124 /* set the RB to use vmid 0 */
3125 WREG32(mmCP_RB_VMID, 0);
3127 /* Set ring buffer size */
3128 ring = &adev->gfx.gfx_ring[0];
3129 rb_bufsz = order_base_2(ring->ring_size / 8);
3130 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3131 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3132 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3133 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3135 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3137 WREG32(mmCP_RB0_CNTL, tmp);
3139 /* Initialize the ring buffer's read and write pointers */
3140 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3142 WREG32(mmCP_RB0_WPTR, ring->wptr);
3144 /* set the wb address wether it's enabled or not */
3145 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3146 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3147 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3150 WREG32(mmCP_RB0_CNTL, tmp);
3152 rb_addr = ring->gpu_addr >> 8;
3153 WREG32(mmCP_RB0_BASE, rb_addr);
3154 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3156 /* no gfx doorbells on iceland */
3157 if (adev->asic_type != CHIP_TOPAZ) {
3158 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3159 if (ring->use_doorbell) {
3160 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3161 DOORBELL_OFFSET, ring->doorbell_index);
3162 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3165 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3168 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3170 if (adev->asic_type == CHIP_TONGA) {
3171 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3172 DOORBELL_RANGE_LOWER,
3173 AMDGPU_DOORBELL_GFX_RING0);
3174 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3176 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3177 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3182 /* start the ring */
3183 gfx_v8_0_cp_gfx_start(adev);
3185 r = amdgpu_ring_test_ring(ring);
3187 ring->ready = false;
3194 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3199 WREG32(mmCP_MEC_CNTL, 0);
3201 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3202 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3203 adev->gfx.compute_ring[i].ready = false;
3208 static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3210 gfx_v8_0_cp_compute_enable(adev, true);
3215 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3217 const struct gfx_firmware_header_v1_0 *mec_hdr;
3218 const __le32 *fw_data;
3219 unsigned i, fw_size;
3221 if (!adev->gfx.mec_fw)
3224 gfx_v8_0_cp_compute_enable(adev, false);
3226 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3227 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3229 fw_data = (const __le32 *)
3230 (adev->gfx.mec_fw->data +
3231 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3232 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3235 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3236 for (i = 0; i < fw_size; i++)
3237 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3238 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3240 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3241 if (adev->gfx.mec2_fw) {
3242 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3244 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3245 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3247 fw_data = (const __le32 *)
3248 (adev->gfx.mec2_fw->data +
3249 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3250 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3252 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3253 for (i = 0; i < fw_size; i++)
3254 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3255 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3262 uint32_t header; /* ordinal0 */
3263 uint32_t compute_dispatch_initiator; /* ordinal1 */
3264 uint32_t compute_dim_x; /* ordinal2 */
3265 uint32_t compute_dim_y; /* ordinal3 */
3266 uint32_t compute_dim_z; /* ordinal4 */
3267 uint32_t compute_start_x; /* ordinal5 */
3268 uint32_t compute_start_y; /* ordinal6 */
3269 uint32_t compute_start_z; /* ordinal7 */
3270 uint32_t compute_num_thread_x; /* ordinal8 */
3271 uint32_t compute_num_thread_y; /* ordinal9 */
3272 uint32_t compute_num_thread_z; /* ordinal10 */
3273 uint32_t compute_pipelinestat_enable; /* ordinal11 */
3274 uint32_t compute_perfcount_enable; /* ordinal12 */
3275 uint32_t compute_pgm_lo; /* ordinal13 */
3276 uint32_t compute_pgm_hi; /* ordinal14 */
3277 uint32_t compute_tba_lo; /* ordinal15 */
3278 uint32_t compute_tba_hi; /* ordinal16 */
3279 uint32_t compute_tma_lo; /* ordinal17 */
3280 uint32_t compute_tma_hi; /* ordinal18 */
3281 uint32_t compute_pgm_rsrc1; /* ordinal19 */
3282 uint32_t compute_pgm_rsrc2; /* ordinal20 */
3283 uint32_t compute_vmid; /* ordinal21 */
3284 uint32_t compute_resource_limits; /* ordinal22 */
3285 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
3286 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
3287 uint32_t compute_tmpring_size; /* ordinal25 */
3288 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
3289 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
3290 uint32_t compute_restart_x; /* ordinal28 */
3291 uint32_t compute_restart_y; /* ordinal29 */
3292 uint32_t compute_restart_z; /* ordinal30 */
3293 uint32_t compute_thread_trace_enable; /* ordinal31 */
3294 uint32_t compute_misc_reserved; /* ordinal32 */
3295 uint32_t compute_dispatch_id; /* ordinal33 */
3296 uint32_t compute_threadgroup_id; /* ordinal34 */
3297 uint32_t compute_relaunch; /* ordinal35 */
3298 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
3299 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
3300 uint32_t compute_wave_restore_control; /* ordinal38 */
3301 uint32_t reserved9; /* ordinal39 */
3302 uint32_t reserved10; /* ordinal40 */
3303 uint32_t reserved11; /* ordinal41 */
3304 uint32_t reserved12; /* ordinal42 */
3305 uint32_t reserved13; /* ordinal43 */
3306 uint32_t reserved14; /* ordinal44 */
3307 uint32_t reserved15; /* ordinal45 */
3308 uint32_t reserved16; /* ordinal46 */
3309 uint32_t reserved17; /* ordinal47 */
3310 uint32_t reserved18; /* ordinal48 */
3311 uint32_t reserved19; /* ordinal49 */
3312 uint32_t reserved20; /* ordinal50 */
3313 uint32_t reserved21; /* ordinal51 */
3314 uint32_t reserved22; /* ordinal52 */
3315 uint32_t reserved23; /* ordinal53 */
3316 uint32_t reserved24; /* ordinal54 */
3317 uint32_t reserved25; /* ordinal55 */
3318 uint32_t reserved26; /* ordinal56 */
3319 uint32_t reserved27; /* ordinal57 */
3320 uint32_t reserved28; /* ordinal58 */
3321 uint32_t reserved29; /* ordinal59 */
3322 uint32_t reserved30; /* ordinal60 */
3323 uint32_t reserved31; /* ordinal61 */
3324 uint32_t reserved32; /* ordinal62 */
3325 uint32_t reserved33; /* ordinal63 */
3326 uint32_t reserved34; /* ordinal64 */
3327 uint32_t compute_user_data_0; /* ordinal65 */
3328 uint32_t compute_user_data_1; /* ordinal66 */
3329 uint32_t compute_user_data_2; /* ordinal67 */
3330 uint32_t compute_user_data_3; /* ordinal68 */
3331 uint32_t compute_user_data_4; /* ordinal69 */
3332 uint32_t compute_user_data_5; /* ordinal70 */
3333 uint32_t compute_user_data_6; /* ordinal71 */
3334 uint32_t compute_user_data_7; /* ordinal72 */
3335 uint32_t compute_user_data_8; /* ordinal73 */
3336 uint32_t compute_user_data_9; /* ordinal74 */
3337 uint32_t compute_user_data_10; /* ordinal75 */
3338 uint32_t compute_user_data_11; /* ordinal76 */
3339 uint32_t compute_user_data_12; /* ordinal77 */
3340 uint32_t compute_user_data_13; /* ordinal78 */
3341 uint32_t compute_user_data_14; /* ordinal79 */
3342 uint32_t compute_user_data_15; /* ordinal80 */
3343 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
3344 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
3345 uint32_t reserved35; /* ordinal83 */
3346 uint32_t reserved36; /* ordinal84 */
3347 uint32_t reserved37; /* ordinal85 */
3348 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
3349 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
3350 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
3351 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
3352 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
3353 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
3354 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
3355 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
3356 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
3357 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
3358 uint32_t reserved38; /* ordinal96 */
3359 uint32_t reserved39; /* ordinal97 */
3360 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
3361 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
3362 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
3363 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
3364 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
3365 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
3366 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
3367 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
3368 uint32_t reserved40; /* ordinal106 */
3369 uint32_t reserved41; /* ordinal107 */
3370 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
3371 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
3372 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
3373 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
3374 uint32_t reserved42; /* ordinal112 */
3375 uint32_t reserved43; /* ordinal113 */
3376 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
3377 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
3378 uint32_t cp_packet_id_lo; /* ordinal116 */
3379 uint32_t cp_packet_id_hi; /* ordinal117 */
3380 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
3381 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
3382 uint32_t gds_save_base_addr_lo; /* ordinal120 */
3383 uint32_t gds_save_base_addr_hi; /* ordinal121 */
3384 uint32_t gds_save_mask_lo; /* ordinal122 */
3385 uint32_t gds_save_mask_hi; /* ordinal123 */
3386 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
3387 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
3388 uint32_t reserved44; /* ordinal126 */
3389 uint32_t reserved45; /* ordinal127 */
3390 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
3391 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
3392 uint32_t cp_hqd_active; /* ordinal130 */
3393 uint32_t cp_hqd_vmid; /* ordinal131 */
3394 uint32_t cp_hqd_persistent_state; /* ordinal132 */
3395 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
3396 uint32_t cp_hqd_queue_priority; /* ordinal134 */
3397 uint32_t cp_hqd_quantum; /* ordinal135 */
3398 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
3399 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
3400 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
3401 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
3402 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
3403 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
3404 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
3405 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
3406 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
3407 uint32_t cp_hqd_pq_control; /* ordinal145 */
3408 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
3409 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
3410 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
3411 uint32_t cp_hqd_ib_control; /* ordinal149 */
3412 uint32_t cp_hqd_iq_timer; /* ordinal150 */
3413 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
3414 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
3415 uint32_t cp_hqd_dma_offload; /* ordinal153 */
3416 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
3417 uint32_t cp_hqd_msg_type; /* ordinal155 */
3418 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
3419 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
3420 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
3421 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
3422 uint32_t cp_hqd_hq_status0; /* ordinal160 */
3423 uint32_t cp_hqd_hq_control0; /* ordinal161 */
3424 uint32_t cp_mqd_control; /* ordinal162 */
3425 uint32_t cp_hqd_hq_status1; /* ordinal163 */
3426 uint32_t cp_hqd_hq_control1; /* ordinal164 */
3427 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
3428 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
3429 uint32_t cp_hqd_eop_control; /* ordinal167 */
3430 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
3431 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
3432 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
3433 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
3434 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
3435 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
3436 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
3437 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
3438 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
3439 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
3440 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
3441 uint32_t cp_hqd_error; /* ordinal179 */
3442 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
3443 uint32_t cp_hqd_eop_dones; /* ordinal181 */
3444 uint32_t reserved46; /* ordinal182 */
3445 uint32_t reserved47; /* ordinal183 */
3446 uint32_t reserved48; /* ordinal184 */
3447 uint32_t reserved49; /* ordinal185 */
3448 uint32_t reserved50; /* ordinal186 */
3449 uint32_t reserved51; /* ordinal187 */
3450 uint32_t reserved52; /* ordinal188 */
3451 uint32_t reserved53; /* ordinal189 */
3452 uint32_t reserved54; /* ordinal190 */
3453 uint32_t reserved55; /* ordinal191 */
3454 uint32_t iqtimer_pkt_header; /* ordinal192 */
3455 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
3456 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
3457 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
3458 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
3459 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
3460 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
3461 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
3462 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
3463 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
3464 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
3465 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
3466 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
3467 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
3468 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
3469 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
3470 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
3471 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
3472 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
3473 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
3474 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
3475 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
3476 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
3477 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
3478 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
3479 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
3480 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
3481 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
3482 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
3483 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
3484 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
3485 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
3486 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
3487 uint32_t reserved56; /* ordinal225 */
3488 uint32_t reserved57; /* ordinal226 */
3489 uint32_t reserved58; /* ordinal227 */
3490 uint32_t set_resources_header; /* ordinal228 */
3491 uint32_t set_resources_dw1; /* ordinal229 */
3492 uint32_t set_resources_dw2; /* ordinal230 */
3493 uint32_t set_resources_dw3; /* ordinal231 */
3494 uint32_t set_resources_dw4; /* ordinal232 */
3495 uint32_t set_resources_dw5; /* ordinal233 */
3496 uint32_t set_resources_dw6; /* ordinal234 */
3497 uint32_t set_resources_dw7; /* ordinal235 */
3498 uint32_t reserved59; /* ordinal236 */
3499 uint32_t reserved60; /* ordinal237 */
3500 uint32_t reserved61; /* ordinal238 */
3501 uint32_t reserved62; /* ordinal239 */
3502 uint32_t reserved63; /* ordinal240 */
3503 uint32_t reserved64; /* ordinal241 */
3504 uint32_t reserved65; /* ordinal242 */
3505 uint32_t reserved66; /* ordinal243 */
3506 uint32_t reserved67; /* ordinal244 */
3507 uint32_t reserved68; /* ordinal245 */
3508 uint32_t reserved69; /* ordinal246 */
3509 uint32_t reserved70; /* ordinal247 */
3510 uint32_t reserved71; /* ordinal248 */
3511 uint32_t reserved72; /* ordinal249 */
3512 uint32_t reserved73; /* ordinal250 */
3513 uint32_t reserved74; /* ordinal251 */
3514 uint32_t reserved75; /* ordinal252 */
3515 uint32_t reserved76; /* ordinal253 */
3516 uint32_t reserved77; /* ordinal254 */
3517 uint32_t reserved78; /* ordinal255 */
3519 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3522 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3526 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3527 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3529 if (ring->mqd_obj) {
3530 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3531 if (unlikely(r != 0))
3532 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3534 amdgpu_bo_unpin(ring->mqd_obj);
3535 amdgpu_bo_unreserve(ring->mqd_obj);
3537 amdgpu_bo_unref(&ring->mqd_obj);
3538 ring->mqd_obj = NULL;
3543 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3547 bool use_doorbell = true;
3555 /* init the pipes */
3556 mutex_lock(&adev->srbm_mutex);
3557 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3558 int me = (i < 4) ? 1 : 2;
3559 int pipe = (i < 4) ? i : (i - 4);
3561 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3564 vi_srbm_select(adev, me, pipe, 0, 0);
3566 /* write the EOP addr */
3567 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3568 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3570 /* set the VMID assigned */
3571 WREG32(mmCP_HQD_VMID, 0);
3573 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3574 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3575 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3576 (order_base_2(MEC_HPD_SIZE / 4) - 1));
3577 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3579 vi_srbm_select(adev, 0, 0, 0, 0);
3580 mutex_unlock(&adev->srbm_mutex);
3582 /* init the queues. Just two for now. */
3583 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3584 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3586 if (ring->mqd_obj == NULL) {
3587 r = amdgpu_bo_create(adev,
3588 sizeof(struct vi_mqd),
3590 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3591 NULL, &ring->mqd_obj);
3593 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3598 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3599 if (unlikely(r != 0)) {
3600 gfx_v8_0_cp_compute_fini(adev);
3603 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3606 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3607 gfx_v8_0_cp_compute_fini(adev);
3610 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3612 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3613 gfx_v8_0_cp_compute_fini(adev);
3617 /* init the mqd struct */
3618 memset(buf, 0, sizeof(struct vi_mqd));
3620 mqd = (struct vi_mqd *)buf;
3621 mqd->header = 0xC0310800;
3622 mqd->compute_pipelinestat_enable = 0x00000001;
3623 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3624 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3625 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3626 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3627 mqd->compute_misc_reserved = 0x00000003;
3629 mutex_lock(&adev->srbm_mutex);
3630 vi_srbm_select(adev, ring->me,
3634 /* disable wptr polling */
3635 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3636 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3637 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3639 mqd->cp_hqd_eop_base_addr_lo =
3640 RREG32(mmCP_HQD_EOP_BASE_ADDR);
3641 mqd->cp_hqd_eop_base_addr_hi =
3642 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3644 /* enable doorbell? */
3645 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3647 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3649 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3651 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3652 mqd->cp_hqd_pq_doorbell_control = tmp;
3654 /* disable the queue if it's active */
3655 mqd->cp_hqd_dequeue_request = 0;
3656 mqd->cp_hqd_pq_rptr = 0;
3657 mqd->cp_hqd_pq_wptr= 0;
3658 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3659 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3660 for (j = 0; j < adev->usec_timeout; j++) {
3661 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3665 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3666 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3667 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3670 /* set the pointer to the MQD */
3671 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3672 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3673 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3674 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3676 /* set MQD vmid to 0 */
3677 tmp = RREG32(mmCP_MQD_CONTROL);
3678 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3679 WREG32(mmCP_MQD_CONTROL, tmp);
3680 mqd->cp_mqd_control = tmp;
3682 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3683 hqd_gpu_addr = ring->gpu_addr >> 8;
3684 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3685 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3686 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3687 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3689 /* set up the HQD, this is similar to CP_RB0_CNTL */
3690 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3691 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3692 (order_base_2(ring->ring_size / 4) - 1));
3693 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3694 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3696 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3698 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3699 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3700 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3701 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3702 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3703 mqd->cp_hqd_pq_control = tmp;
3705 /* set the wb address wether it's enabled or not */
3706 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3707 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3708 mqd->cp_hqd_pq_rptr_report_addr_hi =
3709 upper_32_bits(wb_gpu_addr) & 0xffff;
3710 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3711 mqd->cp_hqd_pq_rptr_report_addr_lo);
3712 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3713 mqd->cp_hqd_pq_rptr_report_addr_hi);
3715 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3716 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3717 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3718 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3719 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3720 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3721 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3723 /* enable the doorbell if requested */
3725 if ((adev->asic_type == CHIP_CARRIZO) ||
3726 (adev->asic_type == CHIP_FIJI) ||
3727 (adev->asic_type == CHIP_STONEY)) {
3728 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3729 AMDGPU_DOORBELL_KIQ << 2);
3730 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3731 AMDGPU_DOORBELL_MEC_RING7 << 2);
3733 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3734 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3735 DOORBELL_OFFSET, ring->doorbell_index);
3736 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3737 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3738 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3739 mqd->cp_hqd_pq_doorbell_control = tmp;
3742 mqd->cp_hqd_pq_doorbell_control = 0;
3744 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3745 mqd->cp_hqd_pq_doorbell_control);
3747 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3749 mqd->cp_hqd_pq_wptr = ring->wptr;
3750 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3751 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3753 /* set the vmid for the queue */
3754 mqd->cp_hqd_vmid = 0;
3755 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3757 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3758 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3759 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3760 mqd->cp_hqd_persistent_state = tmp;
3761 if (adev->asic_type == CHIP_STONEY) {
3762 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3763 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3764 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3767 /* activate the queue */
3768 mqd->cp_hqd_active = 1;
3769 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3771 vi_srbm_select(adev, 0, 0, 0, 0);
3772 mutex_unlock(&adev->srbm_mutex);
3774 amdgpu_bo_kunmap(ring->mqd_obj);
3775 amdgpu_bo_unreserve(ring->mqd_obj);
3779 tmp = RREG32(mmCP_PQ_STATUS);
3780 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3781 WREG32(mmCP_PQ_STATUS, tmp);
3784 r = gfx_v8_0_cp_compute_start(adev);
3788 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3789 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3792 r = amdgpu_ring_test_ring(ring);
3794 ring->ready = false;
3800 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3804 if (!(adev->flags & AMD_IS_APU))
3805 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3807 if (!adev->pp_enabled) {
3808 if (!adev->firmware.smu_load) {
3809 /* legacy firmware loading */
3810 r = gfx_v8_0_cp_gfx_load_microcode(adev);
3814 r = gfx_v8_0_cp_compute_load_microcode(adev);
3818 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3819 AMDGPU_UCODE_ID_CP_CE);
3823 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3824 AMDGPU_UCODE_ID_CP_PFP);
3828 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3829 AMDGPU_UCODE_ID_CP_ME);
3833 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3834 AMDGPU_UCODE_ID_CP_MEC1);
3840 r = gfx_v8_0_cp_gfx_resume(adev);
3844 r = gfx_v8_0_cp_compute_resume(adev);
3848 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3853 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3855 gfx_v8_0_cp_gfx_enable(adev, enable);
3856 gfx_v8_0_cp_compute_enable(adev, enable);
3859 static int gfx_v8_0_hw_init(void *handle)
3862 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3864 gfx_v8_0_init_golden_registers(adev);
3866 gfx_v8_0_gpu_init(adev);
3868 r = gfx_v8_0_rlc_resume(adev);
3872 r = gfx_v8_0_cp_resume(adev);
3879 static int gfx_v8_0_hw_fini(void *handle)
3881 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3883 gfx_v8_0_cp_enable(adev, false);
3884 gfx_v8_0_rlc_stop(adev);
3885 gfx_v8_0_cp_compute_fini(adev);
3890 static int gfx_v8_0_suspend(void *handle)
3892 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3894 return gfx_v8_0_hw_fini(adev);
3897 static int gfx_v8_0_resume(void *handle)
3899 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3901 return gfx_v8_0_hw_init(adev);
3904 static bool gfx_v8_0_is_idle(void *handle)
3906 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3908 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3914 static int gfx_v8_0_wait_for_idle(void *handle)
3918 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3920 for (i = 0; i < adev->usec_timeout; i++) {
3921 /* read MC_STATUS */
3922 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3924 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3931 static void gfx_v8_0_print_status(void *handle)
3934 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3936 dev_info(adev->dev, "GFX 8.x registers\n");
3937 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
3938 RREG32(mmGRBM_STATUS));
3939 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
3940 RREG32(mmGRBM_STATUS2));
3941 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3942 RREG32(mmGRBM_STATUS_SE0));
3943 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3944 RREG32(mmGRBM_STATUS_SE1));
3945 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3946 RREG32(mmGRBM_STATUS_SE2));
3947 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3948 RREG32(mmGRBM_STATUS_SE3));
3949 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3950 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3951 RREG32(mmCP_STALLED_STAT1));
3952 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3953 RREG32(mmCP_STALLED_STAT2));
3954 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3955 RREG32(mmCP_STALLED_STAT3));
3956 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3957 RREG32(mmCP_CPF_BUSY_STAT));
3958 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3959 RREG32(mmCP_CPF_STALLED_STAT1));
3960 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3961 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3962 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3963 RREG32(mmCP_CPC_STALLED_STAT1));
3964 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3966 for (i = 0; i < 32; i++) {
3967 dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
3968 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3970 for (i = 0; i < 16; i++) {
3971 dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
3972 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3974 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3975 dev_info(adev->dev, " se: %d\n", i);
3976 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3977 dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
3978 RREG32(mmPA_SC_RASTER_CONFIG));
3979 dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
3980 RREG32(mmPA_SC_RASTER_CONFIG_1));
3982 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3984 dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
3985 RREG32(mmGB_ADDR_CONFIG));
3986 dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n",
3987 RREG32(mmHDP_ADDR_CONFIG));
3988 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
3989 RREG32(mmDMIF_ADDR_CALC));
3990 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
3991 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
3992 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
3993 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
3994 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
3995 RREG32(mmUVD_UDEC_ADDR_CONFIG));
3996 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
3997 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
3998 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
3999 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4001 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
4002 RREG32(mmCP_MEQ_THRESHOLDS));
4003 dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
4004 RREG32(mmSX_DEBUG_1));
4005 dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
4006 RREG32(mmTA_CNTL_AUX));
4007 dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
4008 RREG32(mmSPI_CONFIG_CNTL));
4009 dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
4010 RREG32(mmSQ_CONFIG));
4011 dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
4012 RREG32(mmDB_DEBUG));
4013 dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
4014 RREG32(mmDB_DEBUG2));
4015 dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
4016 RREG32(mmDB_DEBUG3));
4017 dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
4018 RREG32(mmCB_HW_CONTROL));
4019 dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
4020 RREG32(mmSPI_CONFIG_CNTL_1));
4021 dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
4022 RREG32(mmPA_SC_FIFO_SIZE));
4023 dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
4024 RREG32(mmVGT_NUM_INSTANCES));
4025 dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
4026 RREG32(mmCP_PERFMON_CNTL));
4027 dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4028 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4029 dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
4030 RREG32(mmVGT_CACHE_INVALIDATION));
4031 dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
4032 RREG32(mmVGT_GS_VERTEX_REUSE));
4033 dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4034 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4035 dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
4036 RREG32(mmPA_CL_ENHANCE));
4037 dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
4038 RREG32(mmPA_SC_ENHANCE));
4040 dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
4041 RREG32(mmCP_ME_CNTL));
4042 dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
4043 RREG32(mmCP_MAX_CONTEXT));
4044 dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n",
4045 RREG32(mmCP_ENDIAN_SWAP));
4046 dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
4047 RREG32(mmCP_DEVICE_ID));
4049 dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
4050 RREG32(mmCP_SEM_WAIT_TIMER));
4052 dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
4053 RREG32(mmCP_RB_WPTR_DELAY));
4054 dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
4055 RREG32(mmCP_RB_VMID));
4056 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4057 RREG32(mmCP_RB0_CNTL));
4058 dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
4059 RREG32(mmCP_RB0_WPTR));
4060 dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
4061 RREG32(mmCP_RB0_RPTR_ADDR));
4062 dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4063 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4064 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4065 RREG32(mmCP_RB0_CNTL));
4066 dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
4067 RREG32(mmCP_RB0_BASE));
4068 dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
4069 RREG32(mmCP_RB0_BASE_HI));
4070 dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
4071 RREG32(mmCP_MEC_CNTL));
4072 dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n",
4073 RREG32(mmCP_CPF_DEBUG));
4075 dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
4076 RREG32(mmSCRATCH_ADDR));
4077 dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
4078 RREG32(mmSCRATCH_UMSK));
4080 dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
4081 RREG32(mmCP_INT_CNTL_RING0));
4082 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4083 RREG32(mmRLC_LB_CNTL));
4084 dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
4085 RREG32(mmRLC_CNTL));
4086 dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
4087 RREG32(mmRLC_CGCG_CGLS_CTRL));
4088 dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
4089 RREG32(mmRLC_LB_CNTR_INIT));
4090 dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
4091 RREG32(mmRLC_LB_CNTR_MAX));
4092 dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
4093 RREG32(mmRLC_LB_INIT_CU_MASK));
4094 dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
4095 RREG32(mmRLC_LB_PARAMS));
4096 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4097 RREG32(mmRLC_LB_CNTL));
4098 dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n",
4099 RREG32(mmRLC_MC_CNTL));
4100 dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
4101 RREG32(mmRLC_UCODE_CNTL));
4103 mutex_lock(&adev->srbm_mutex);
4104 for (i = 0; i < 16; i++) {
4105 vi_srbm_select(adev, 0, 0, 0, i);
4106 dev_info(adev->dev, " VM %d:\n", i);
4107 dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
4108 RREG32(mmSH_MEM_CONFIG));
4109 dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n",
4110 RREG32(mmSH_MEM_APE1_BASE));
4111 dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n",
4112 RREG32(mmSH_MEM_APE1_LIMIT));
4113 dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
4114 RREG32(mmSH_MEM_BASES));
4116 vi_srbm_select(adev, 0, 0, 0, 0);
4117 mutex_unlock(&adev->srbm_mutex);
4120 static int gfx_v8_0_soft_reset(void *handle)
4122 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4124 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4127 tmp = RREG32(mmGRBM_STATUS);
4128 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4129 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4130 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4131 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4132 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4133 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4134 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4135 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4136 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4137 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4140 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4141 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4142 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4143 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4144 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4148 tmp = RREG32(mmGRBM_STATUS2);
4149 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4150 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4151 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4154 tmp = RREG32(mmSRBM_STATUS);
4155 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4156 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4157 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4159 if (grbm_soft_reset || srbm_soft_reset) {
4160 gfx_v8_0_print_status((void *)adev);
4162 gfx_v8_0_rlc_stop(adev);
4164 /* Disable GFX parsing/prefetching */
4165 gfx_v8_0_cp_gfx_enable(adev, false);
4167 /* Disable MEC parsing/prefetching */
4170 if (grbm_soft_reset) {
4171 tmp = RREG32(mmGRBM_SOFT_RESET);
4172 tmp |= grbm_soft_reset;
4173 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4174 WREG32(mmGRBM_SOFT_RESET, tmp);
4175 tmp = RREG32(mmGRBM_SOFT_RESET);
4179 tmp &= ~grbm_soft_reset;
4180 WREG32(mmGRBM_SOFT_RESET, tmp);
4181 tmp = RREG32(mmGRBM_SOFT_RESET);
4184 if (srbm_soft_reset) {
4185 tmp = RREG32(mmSRBM_SOFT_RESET);
4186 tmp |= srbm_soft_reset;
4187 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4188 WREG32(mmSRBM_SOFT_RESET, tmp);
4189 tmp = RREG32(mmSRBM_SOFT_RESET);
4193 tmp &= ~srbm_soft_reset;
4194 WREG32(mmSRBM_SOFT_RESET, tmp);
4195 tmp = RREG32(mmSRBM_SOFT_RESET);
4197 /* Wait a little for things to settle down */
4199 gfx_v8_0_print_status((void *)adev);
4205 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4207 * @adev: amdgpu_device pointer
4209 * Fetches a GPU clock counter snapshot.
4210 * Returns the 64 bit clock counter snapshot.
4212 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4216 mutex_lock(&adev->gfx.gpu_clock_mutex);
4217 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4218 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4219 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4220 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4224 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4226 uint32_t gds_base, uint32_t gds_size,
4227 uint32_t gws_base, uint32_t gws_size,
4228 uint32_t oa_base, uint32_t oa_size)
4230 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4231 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4233 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4234 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4236 oa_base = oa_base >> AMDGPU_OA_SHIFT;
4237 oa_size = oa_size >> AMDGPU_OA_SHIFT;
4240 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4241 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4242 WRITE_DATA_DST_SEL(0)));
4243 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4244 amdgpu_ring_write(ring, 0);
4245 amdgpu_ring_write(ring, gds_base);
4248 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4249 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4250 WRITE_DATA_DST_SEL(0)));
4251 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4252 amdgpu_ring_write(ring, 0);
4253 amdgpu_ring_write(ring, gds_size);
4256 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4257 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4258 WRITE_DATA_DST_SEL(0)));
4259 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4260 amdgpu_ring_write(ring, 0);
4261 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4264 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4265 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4266 WRITE_DATA_DST_SEL(0)));
4267 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4268 amdgpu_ring_write(ring, 0);
4269 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4272 static int gfx_v8_0_early_init(void *handle)
4274 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4276 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4277 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4278 gfx_v8_0_set_ring_funcs(adev);
4279 gfx_v8_0_set_irq_funcs(adev);
4280 gfx_v8_0_set_gds_init(adev);
4285 static int gfx_v8_0_late_init(void *handle)
4287 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4290 /* requires IBs so do in late init after IB pool is initialized */
4291 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4298 static int gfx_v8_0_set_powergating_state(void *handle,
4299 enum amd_powergating_state state)
4304 static int gfx_v8_0_set_clockgating_state(void *handle,
4305 enum amd_clockgating_state state)
4310 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4314 rptr = ring->adev->wb.wb[ring->rptr_offs];
4319 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4321 struct amdgpu_device *adev = ring->adev;
4324 if (ring->use_doorbell)
4325 /* XXX check if swapping is necessary on BE */
4326 wptr = ring->adev->wb.wb[ring->wptr_offs];
4328 wptr = RREG32(mmCP_RB0_WPTR);
4333 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4335 struct amdgpu_device *adev = ring->adev;
4337 if (ring->use_doorbell) {
4338 /* XXX check if swapping is necessary on BE */
4339 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4340 WDOORBELL32(ring->doorbell_index, ring->wptr);
4342 WREG32(mmCP_RB0_WPTR, ring->wptr);
4343 (void)RREG32(mmCP_RB0_WPTR);
4347 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4349 u32 ref_and_mask, reg_mem_engine;
4351 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4354 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4357 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4364 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4365 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4368 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4369 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4370 WAIT_REG_MEM_FUNCTION(3) | /* == */
4372 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4373 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4374 amdgpu_ring_write(ring, ref_and_mask);
4375 amdgpu_ring_write(ring, ref_and_mask);
4376 amdgpu_ring_write(ring, 0x20); /* poll interval */
4379 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4380 struct amdgpu_ib *ib)
4382 bool need_ctx_switch = ring->current_ctx != ib->ctx;
4383 u32 header, control = 0;
4384 u32 next_rptr = ring->wptr + 5;
4386 /* drop the CE preamble IB for the same context */
4387 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4390 if (need_ctx_switch)
4394 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4395 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4396 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4397 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4398 amdgpu_ring_write(ring, next_rptr);
4400 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4401 if (need_ctx_switch) {
4402 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4403 amdgpu_ring_write(ring, 0);
4406 if (ib->flags & AMDGPU_IB_FLAG_CE)
4407 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4409 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4411 control |= ib->length_dw |
4412 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4414 amdgpu_ring_write(ring, header);
4415 amdgpu_ring_write(ring,
4419 (ib->gpu_addr & 0xFFFFFFFC));
4420 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4421 amdgpu_ring_write(ring, control);
4424 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4425 struct amdgpu_ib *ib)
4427 u32 header, control = 0;
4428 u32 next_rptr = ring->wptr + 5;
4430 control |= INDIRECT_BUFFER_VALID;
4433 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4434 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4435 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4436 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4437 amdgpu_ring_write(ring, next_rptr);
4439 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4441 control |= ib->length_dw |
4442 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4444 amdgpu_ring_write(ring, header);
4445 amdgpu_ring_write(ring,
4449 (ib->gpu_addr & 0xFFFFFFFC));
4450 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4451 amdgpu_ring_write(ring, control);
4454 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4455 u64 seq, unsigned flags)
4457 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4458 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4460 /* EVENT_WRITE_EOP - flush caches, send int */
4461 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4462 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4464 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4466 amdgpu_ring_write(ring, addr & 0xfffffffc);
4467 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4468 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4469 amdgpu_ring_write(ring, lower_32_bits(seq));
4470 amdgpu_ring_write(ring, upper_32_bits(seq));
4475 * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4477 * @ring: amdgpu ring buffer object
4478 * @semaphore: amdgpu semaphore object
4479 * @emit_wait: Is this a sempahore wait?
4481 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4482 * from running ahead of semaphore waits.
4484 static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4485 struct amdgpu_semaphore *semaphore,
4488 uint64_t addr = semaphore->gpu_addr;
4489 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4491 if (ring->adev->asic_type == CHIP_TOPAZ ||
4492 ring->adev->asic_type == CHIP_TONGA ||
4493 ring->adev->asic_type == CHIP_FIJI)
4494 /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4497 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4498 amdgpu_ring_write(ring, lower_32_bits(addr));
4499 amdgpu_ring_write(ring, upper_32_bits(addr));
4500 amdgpu_ring_write(ring, sel);
4503 if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4504 /* Prevent the PFP from running ahead of the semaphore wait */
4505 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4506 amdgpu_ring_write(ring, 0x0);
4512 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4513 unsigned vm_id, uint64_t pd_addr)
4515 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4516 uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
4517 uint64_t addr = ring->fence_drv.gpu_addr;
4519 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4520 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4521 WAIT_REG_MEM_FUNCTION(3))); /* equal */
4522 amdgpu_ring_write(ring, addr & 0xfffffffc);
4523 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4524 amdgpu_ring_write(ring, seq);
4525 amdgpu_ring_write(ring, 0xffffffff);
4526 amdgpu_ring_write(ring, 4); /* poll interval */
4529 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4530 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4531 amdgpu_ring_write(ring, 0);
4532 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4533 amdgpu_ring_write(ring, 0);
4536 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4537 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4538 WRITE_DATA_DST_SEL(0)) |
4541 amdgpu_ring_write(ring,
4542 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4544 amdgpu_ring_write(ring,
4545 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4547 amdgpu_ring_write(ring, 0);
4548 amdgpu_ring_write(ring, pd_addr >> 12);
4550 /* bits 0-15 are the VM contexts0-15 */
4551 /* invalidate the cache */
4552 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4553 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4554 WRITE_DATA_DST_SEL(0)));
4555 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4556 amdgpu_ring_write(ring, 0);
4557 amdgpu_ring_write(ring, 1 << vm_id);
4559 /* wait for the invalidate to complete */
4560 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4561 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4562 WAIT_REG_MEM_FUNCTION(0) | /* always */
4563 WAIT_REG_MEM_ENGINE(0))); /* me */
4564 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4565 amdgpu_ring_write(ring, 0);
4566 amdgpu_ring_write(ring, 0); /* ref */
4567 amdgpu_ring_write(ring, 0); /* mask */
4568 amdgpu_ring_write(ring, 0x20); /* poll interval */
4570 /* compute doesn't have PFP */
4572 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4573 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4574 amdgpu_ring_write(ring, 0x0);
4575 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4576 amdgpu_ring_write(ring, 0);
4577 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4578 amdgpu_ring_write(ring, 0);
4582 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4584 return ring->adev->wb.wb[ring->rptr_offs];
4587 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4589 return ring->adev->wb.wb[ring->wptr_offs];
4592 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4594 struct amdgpu_device *adev = ring->adev;
4596 /* XXX check if swapping is necessary on BE */
4597 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4598 WDOORBELL32(ring->doorbell_index, ring->wptr);
4601 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4605 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4606 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4608 /* RELEASE_MEM - flush caches, send int */
4609 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4610 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4612 EOP_TC_WB_ACTION_EN |
4613 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4615 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4616 amdgpu_ring_write(ring, addr & 0xfffffffc);
4617 amdgpu_ring_write(ring, upper_32_bits(addr));
4618 amdgpu_ring_write(ring, lower_32_bits(seq));
4619 amdgpu_ring_write(ring, upper_32_bits(seq));
4622 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4623 enum amdgpu_interrupt_state state)
4628 case AMDGPU_IRQ_STATE_DISABLE:
4629 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4630 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4631 TIME_STAMP_INT_ENABLE, 0);
4632 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4634 case AMDGPU_IRQ_STATE_ENABLE:
4635 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4637 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4638 TIME_STAMP_INT_ENABLE, 1);
4639 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4646 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4648 enum amdgpu_interrupt_state state)
4650 u32 mec_int_cntl, mec_int_cntl_reg;
4653 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4654 * handles the setting of interrupts for this specific pipe. All other
4655 * pipes' interrupts are set by amdkfd.
4661 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4664 DRM_DEBUG("invalid pipe %d\n", pipe);
4668 DRM_DEBUG("invalid me %d\n", me);
4673 case AMDGPU_IRQ_STATE_DISABLE:
4674 mec_int_cntl = RREG32(mec_int_cntl_reg);
4675 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4676 TIME_STAMP_INT_ENABLE, 0);
4677 WREG32(mec_int_cntl_reg, mec_int_cntl);
4679 case AMDGPU_IRQ_STATE_ENABLE:
4680 mec_int_cntl = RREG32(mec_int_cntl_reg);
4681 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4682 TIME_STAMP_INT_ENABLE, 1);
4683 WREG32(mec_int_cntl_reg, mec_int_cntl);
4690 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4691 struct amdgpu_irq_src *source,
4693 enum amdgpu_interrupt_state state)
4698 case AMDGPU_IRQ_STATE_DISABLE:
4699 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4700 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4701 PRIV_REG_INT_ENABLE, 0);
4702 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4704 case AMDGPU_IRQ_STATE_ENABLE:
4705 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4706 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4707 PRIV_REG_INT_ENABLE, 0);
4708 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4717 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4718 struct amdgpu_irq_src *source,
4720 enum amdgpu_interrupt_state state)
4725 case AMDGPU_IRQ_STATE_DISABLE:
4726 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4727 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4728 PRIV_INSTR_INT_ENABLE, 0);
4729 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4731 case AMDGPU_IRQ_STATE_ENABLE:
4732 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4733 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4734 PRIV_INSTR_INT_ENABLE, 1);
4735 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4744 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4745 struct amdgpu_irq_src *src,
4747 enum amdgpu_interrupt_state state)
4750 case AMDGPU_CP_IRQ_GFX_EOP:
4751 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4753 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4754 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4756 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4757 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4759 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4760 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4762 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4763 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4765 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4766 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4768 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4769 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4771 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4772 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4774 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4775 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4783 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4784 struct amdgpu_irq_src *source,
4785 struct amdgpu_iv_entry *entry)
4788 u8 me_id, pipe_id, queue_id;
4789 struct amdgpu_ring *ring;
4791 DRM_DEBUG("IH: CP EOP\n");
4792 me_id = (entry->ring_id & 0x0c) >> 2;
4793 pipe_id = (entry->ring_id & 0x03) >> 0;
4794 queue_id = (entry->ring_id & 0x70) >> 4;
4798 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4802 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4803 ring = &adev->gfx.compute_ring[i];
4804 /* Per-queue interrupt is supported for MEC starting from VI.
4805 * The interrupt can only be enabled/disabled per pipe instead of per queue.
4807 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4808 amdgpu_fence_process(ring);
4815 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
4816 struct amdgpu_irq_src *source,
4817 struct amdgpu_iv_entry *entry)
4819 DRM_ERROR("Illegal register access in command stream\n");
4820 schedule_work(&adev->reset_work);
4824 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
4825 struct amdgpu_irq_src *source,
4826 struct amdgpu_iv_entry *entry)
4828 DRM_ERROR("Illegal instruction in command stream\n");
4829 schedule_work(&adev->reset_work);
4833 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
4834 .early_init = gfx_v8_0_early_init,
4835 .late_init = gfx_v8_0_late_init,
4836 .sw_init = gfx_v8_0_sw_init,
4837 .sw_fini = gfx_v8_0_sw_fini,
4838 .hw_init = gfx_v8_0_hw_init,
4839 .hw_fini = gfx_v8_0_hw_fini,
4840 .suspend = gfx_v8_0_suspend,
4841 .resume = gfx_v8_0_resume,
4842 .is_idle = gfx_v8_0_is_idle,
4843 .wait_for_idle = gfx_v8_0_wait_for_idle,
4844 .soft_reset = gfx_v8_0_soft_reset,
4845 .print_status = gfx_v8_0_print_status,
4846 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
4847 .set_powergating_state = gfx_v8_0_set_powergating_state,
4850 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
4851 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
4852 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
4853 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
4855 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
4856 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
4857 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4858 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4859 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
4860 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
4861 .test_ring = gfx_v8_0_ring_test_ring,
4862 .test_ib = gfx_v8_0_ring_test_ib,
4863 .insert_nop = amdgpu_ring_insert_nop,
4866 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
4867 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
4868 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
4869 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
4871 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
4872 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
4873 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4874 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4875 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
4876 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
4877 .test_ring = gfx_v8_0_ring_test_ring,
4878 .test_ib = gfx_v8_0_ring_test_ib,
4879 .insert_nop = amdgpu_ring_insert_nop,
4882 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
4886 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4887 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
4889 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4890 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
4893 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
4894 .set = gfx_v8_0_set_eop_interrupt_state,
4895 .process = gfx_v8_0_eop_irq,
4898 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
4899 .set = gfx_v8_0_set_priv_reg_fault_state,
4900 .process = gfx_v8_0_priv_reg_irq,
4903 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
4904 .set = gfx_v8_0_set_priv_inst_fault_state,
4905 .process = gfx_v8_0_priv_inst_irq,
4908 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
4910 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
4911 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
4913 adev->gfx.priv_reg_irq.num_types = 1;
4914 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
4916 adev->gfx.priv_inst_irq.num_types = 1;
4917 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4920 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
4922 /* init asci gds info */
4923 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
4924 adev->gds.gws.total_size = 64;
4925 adev->gds.oa.total_size = 16;
4927 if (adev->gds.mem.total_size == 64 * 1024) {
4928 adev->gds.mem.gfx_partition_size = 4096;
4929 adev->gds.mem.cs_partition_size = 4096;
4931 adev->gds.gws.gfx_partition_size = 4;
4932 adev->gds.gws.cs_partition_size = 4;
4934 adev->gds.oa.gfx_partition_size = 4;
4935 adev->gds.oa.cs_partition_size = 1;
4937 adev->gds.mem.gfx_partition_size = 1024;
4938 adev->gds.mem.cs_partition_size = 1024;
4940 adev->gds.gws.gfx_partition_size = 16;
4941 adev->gds.gws.cs_partition_size = 16;
4943 adev->gds.oa.gfx_partition_size = 4;
4944 adev->gds.oa.cs_partition_size = 4;
4948 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
4951 u32 mask = 0, tmp, tmp1;
4954 gfx_v8_0_select_se_sh(adev, se, sh);
4955 tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
4956 tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
4957 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4964 for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
4969 return (~tmp) & mask;
4972 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
4973 struct amdgpu_cu_info *cu_info)
4975 int i, j, k, counter, active_cu_number = 0;
4976 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4978 if (!adev || !cu_info)
4981 mutex_lock(&adev->grbm_idx_mutex);
4982 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4983 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4987 bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
4988 cu_info->bitmap[i][j] = bitmap;
4990 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
4991 if (bitmap & mask) {
4998 active_cu_number += counter;
4999 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5003 cu_info->number = active_cu_number;
5004 cu_info->ao_cu_mask = ao_cu_mask;
5005 mutex_unlock(&adev->grbm_idx_mutex);