2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
31 #include "amdgpu_gfx.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
49 #include "amdgpu_ras.h"
51 #define GFX9_NUM_GFX_RINGS 1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
56 #define mmPWR_MISC_CNTL_STATUS 0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
111 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
112 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
113 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
114 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
115 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
116 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
117 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
118 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
119 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
120 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
121 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
122 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
124 enum ta_ras_gfx_subblock {
126 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
127 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
128 TA_RAS_BLOCK__GFX_CPC_UCODE,
129 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
130 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
131 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
132 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
133 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
134 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
135 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
137 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
138 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
139 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
140 TA_RAS_BLOCK__GFX_CPF_TAG,
141 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
143 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
144 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
145 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
146 TA_RAS_BLOCK__GFX_CPG_TAG,
147 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
149 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
150 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
151 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
152 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
153 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
154 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
155 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
157 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
159 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
160 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
161 TA_RAS_BLOCK__GFX_SQ_LDS_D,
162 TA_RAS_BLOCK__GFX_SQ_LDS_I,
163 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
164 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
166 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
168 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
169 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
170 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
171 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
172 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
173 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
174 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
175 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
176 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
177 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
178 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
180 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
181 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
182 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
183 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
184 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
185 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
186 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
187 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
188 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
189 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
190 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
191 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
192 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
194 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
195 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
196 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
197 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
198 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
199 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
201 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
203 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
204 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
205 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
206 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
207 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
209 TA_RAS_BLOCK__GFX_TA_INDEX_START,
210 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
211 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
212 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
213 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
214 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
215 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
217 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
218 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
219 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
220 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
221 /* TCC (5 sub-ranges)*/
222 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
224 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
225 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
226 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
227 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
228 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
229 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
230 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
231 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
232 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
233 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
235 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
236 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
237 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
238 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
239 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
241 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
242 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
243 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
244 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
245 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
246 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
247 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
248 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
249 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
250 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
251 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
253 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
254 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
255 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
256 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
257 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
259 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
260 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
261 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
262 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
263 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
264 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
265 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
267 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
269 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
270 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
271 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
272 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
273 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
274 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
275 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
276 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
277 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
279 TA_RAS_BLOCK__GFX_TD_INDEX_START,
280 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
281 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
282 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
283 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
284 /* EA (3 sub-ranges)*/
285 TA_RAS_BLOCK__GFX_EA_INDEX_START,
287 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
288 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
289 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
290 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
291 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
292 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
293 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
294 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
295 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
296 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
298 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
299 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
300 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
301 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
302 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
303 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
304 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
305 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
306 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
308 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
309 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
310 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
311 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
312 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
313 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
314 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
316 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
318 TA_RAS_BLOCK__UTC_VML2_WALKER,
319 /* UTC ATC L2 2MB cache*/
320 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
321 /* UTC ATC L2 4KB cache*/
322 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
323 TA_RAS_BLOCK__GFX_MAX
326 struct ras_gfx_subblock {
329 int hw_supported_error_type;
330 int sw_supported_error_type;
333 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
334 [AMDGPU_RAS_BLOCK__##subblock] = { \
336 TA_RAS_BLOCK__##subblock, \
337 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
338 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
341 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
342 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
343 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
344 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
345 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
346 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
347 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
348 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
349 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
350 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
351 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
352 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
353 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
354 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
355 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
356 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
357 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
358 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
360 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
362 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
363 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
364 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
365 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
366 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
367 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
368 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
369 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
371 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
373 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
375 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
377 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
379 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
417 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
418 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
419 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
420 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
421 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
422 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
423 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
424 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
425 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
427 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
429 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
431 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
433 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
435 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
436 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
438 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
440 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
444 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
455 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
456 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
457 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
458 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
460 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
461 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
462 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
463 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
464 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
465 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
467 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
468 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
469 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
470 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
471 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
472 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
473 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
474 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
475 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
476 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
477 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
486 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
487 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
488 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
491 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
493 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
494 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
495 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
496 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
497 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
498 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
499 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
500 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
501 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
502 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
503 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
504 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
515 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
552 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
580 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
591 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
614 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
621 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
641 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
658 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
670 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
672 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
673 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
674 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
675 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
676 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
677 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
678 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
679 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
682 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
684 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
685 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
686 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
687 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
688 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
689 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
690 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
691 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
694 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
695 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
696 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
697 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
699 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
700 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
701 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
702 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
703 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
704 struct amdgpu_cu_info *cu_info);
705 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
706 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
707 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
708 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
709 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
710 void *ras_error_status);
711 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
714 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
716 switch (adev->asic_type) {
718 soc15_program_register_sequence(adev,
719 golden_settings_gc_9_0,
720 ARRAY_SIZE(golden_settings_gc_9_0));
721 soc15_program_register_sequence(adev,
722 golden_settings_gc_9_0_vg10,
723 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
726 soc15_program_register_sequence(adev,
727 golden_settings_gc_9_2_1,
728 ARRAY_SIZE(golden_settings_gc_9_2_1));
729 soc15_program_register_sequence(adev,
730 golden_settings_gc_9_2_1_vg12,
731 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
734 soc15_program_register_sequence(adev,
735 golden_settings_gc_9_0,
736 ARRAY_SIZE(golden_settings_gc_9_0));
737 soc15_program_register_sequence(adev,
738 golden_settings_gc_9_0_vg20,
739 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
742 soc15_program_register_sequence(adev,
743 golden_settings_gc_9_4_1_arct,
744 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
747 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
748 ARRAY_SIZE(golden_settings_gc_9_1));
749 if (adev->rev_id >= 8)
750 soc15_program_register_sequence(adev,
751 golden_settings_gc_9_1_rv2,
752 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
754 soc15_program_register_sequence(adev,
755 golden_settings_gc_9_1_rv1,
756 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
762 if (adev->asic_type != CHIP_ARCTURUS)
763 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
764 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
767 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
769 adev->gfx.scratch.num_reg = 8;
770 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
771 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
774 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
775 bool wc, uint32_t reg, uint32_t val)
777 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
778 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
779 WRITE_DATA_DST_SEL(0) |
780 (wc ? WR_CONFIRM : 0));
781 amdgpu_ring_write(ring, reg);
782 amdgpu_ring_write(ring, 0);
783 amdgpu_ring_write(ring, val);
786 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
787 int mem_space, int opt, uint32_t addr0,
788 uint32_t addr1, uint32_t ref, uint32_t mask,
791 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
792 amdgpu_ring_write(ring,
793 /* memory (1) or register (0) */
794 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
795 WAIT_REG_MEM_OPERATION(opt) | /* wait */
796 WAIT_REG_MEM_FUNCTION(3) | /* equal */
797 WAIT_REG_MEM_ENGINE(eng_sel)));
800 BUG_ON(addr0 & 0x3); /* Dword align */
801 amdgpu_ring_write(ring, addr0);
802 amdgpu_ring_write(ring, addr1);
803 amdgpu_ring_write(ring, ref);
804 amdgpu_ring_write(ring, mask);
805 amdgpu_ring_write(ring, inv); /* poll interval */
808 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
810 struct amdgpu_device *adev = ring->adev;
816 r = amdgpu_gfx_scratch_get(adev, &scratch);
820 WREG32(scratch, 0xCAFEDEAD);
821 r = amdgpu_ring_alloc(ring, 3);
823 goto error_free_scratch;
825 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
826 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
827 amdgpu_ring_write(ring, 0xDEADBEEF);
828 amdgpu_ring_commit(ring);
830 for (i = 0; i < adev->usec_timeout; i++) {
831 tmp = RREG32(scratch);
832 if (tmp == 0xDEADBEEF)
837 if (i >= adev->usec_timeout)
841 amdgpu_gfx_scratch_free(adev, scratch);
845 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
847 struct amdgpu_device *adev = ring->adev;
849 struct dma_fence *f = NULL;
856 r = amdgpu_device_wb_get(adev, &index);
860 gpu_addr = adev->wb.gpu_addr + (index * 4);
861 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
862 memset(&ib, 0, sizeof(ib));
863 r = amdgpu_ib_get(adev, NULL, 16, &ib);
867 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
868 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
869 ib.ptr[2] = lower_32_bits(gpu_addr);
870 ib.ptr[3] = upper_32_bits(gpu_addr);
871 ib.ptr[4] = 0xDEADBEEF;
874 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
878 r = dma_fence_wait_timeout(f, false, timeout);
886 tmp = adev->wb.wb[index];
887 if (tmp == 0xDEADBEEF)
893 amdgpu_ib_free(adev, &ib, NULL);
896 amdgpu_device_wb_free(adev, index);
901 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
903 release_firmware(adev->gfx.pfp_fw);
904 adev->gfx.pfp_fw = NULL;
905 release_firmware(adev->gfx.me_fw);
906 adev->gfx.me_fw = NULL;
907 release_firmware(adev->gfx.ce_fw);
908 adev->gfx.ce_fw = NULL;
909 release_firmware(adev->gfx.rlc_fw);
910 adev->gfx.rlc_fw = NULL;
911 release_firmware(adev->gfx.mec_fw);
912 adev->gfx.mec_fw = NULL;
913 release_firmware(adev->gfx.mec2_fw);
914 adev->gfx.mec2_fw = NULL;
916 kfree(adev->gfx.rlc.register_list_format);
919 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
921 const struct rlc_firmware_header_v2_1 *rlc_hdr;
923 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
924 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
925 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
926 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
927 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
928 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
929 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
930 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
931 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
932 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
933 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
934 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
935 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
936 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
937 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
940 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
942 adev->gfx.me_fw_write_wait = false;
943 adev->gfx.mec_fw_write_wait = false;
945 switch (adev->asic_type) {
947 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
948 (adev->gfx.me_feature_version >= 42) &&
949 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
950 (adev->gfx.pfp_feature_version >= 42))
951 adev->gfx.me_fw_write_wait = true;
953 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
954 (adev->gfx.mec_feature_version >= 42))
955 adev->gfx.mec_fw_write_wait = true;
958 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
959 (adev->gfx.me_feature_version >= 44) &&
960 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
961 (adev->gfx.pfp_feature_version >= 44))
962 adev->gfx.me_fw_write_wait = true;
964 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
965 (adev->gfx.mec_feature_version >= 44))
966 adev->gfx.mec_fw_write_wait = true;
969 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
970 (adev->gfx.me_feature_version >= 44) &&
971 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
972 (adev->gfx.pfp_feature_version >= 44))
973 adev->gfx.me_fw_write_wait = true;
975 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
976 (adev->gfx.mec_feature_version >= 44))
977 adev->gfx.mec_fw_write_wait = true;
980 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
981 (adev->gfx.me_feature_version >= 42) &&
982 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
983 (adev->gfx.pfp_feature_version >= 42))
984 adev->gfx.me_fw_write_wait = true;
986 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
987 (adev->gfx.mec_feature_version >= 42))
988 adev->gfx.mec_fw_write_wait = true;
995 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
997 switch (adev->asic_type) {
1003 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1005 if ((adev->gfx.rlc_fw_version != 106 &&
1006 adev->gfx.rlc_fw_version < 531) ||
1007 (adev->gfx.rlc_fw_version == 53815) ||
1008 (adev->gfx.rlc_feature_version < 1) ||
1009 !adev->gfx.rlc.is_rlc_v2_1)
1010 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1017 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1018 const char *chip_name)
1022 struct amdgpu_firmware_info *info = NULL;
1023 const struct common_firmware_header *header = NULL;
1024 const struct gfx_firmware_header_v1_0 *cp_hdr;
1026 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1027 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1030 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1033 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1034 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1035 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1037 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1038 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1041 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1044 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1045 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1046 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1052 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1055 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1056 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1057 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1059 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1060 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1061 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1062 info->fw = adev->gfx.pfp_fw;
1063 header = (const struct common_firmware_header *)info->fw->data;
1064 adev->firmware.fw_size +=
1065 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1068 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1069 info->fw = adev->gfx.me_fw;
1070 header = (const struct common_firmware_header *)info->fw->data;
1071 adev->firmware.fw_size +=
1072 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1074 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1075 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1076 info->fw = adev->gfx.ce_fw;
1077 header = (const struct common_firmware_header *)info->fw->data;
1078 adev->firmware.fw_size +=
1079 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1085 "gfx9: Failed to load firmware \"%s\"\n",
1087 release_firmware(adev->gfx.pfp_fw);
1088 adev->gfx.pfp_fw = NULL;
1089 release_firmware(adev->gfx.me_fw);
1090 adev->gfx.me_fw = NULL;
1091 release_firmware(adev->gfx.ce_fw);
1092 adev->gfx.ce_fw = NULL;
1097 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1098 const char *chip_name)
1102 struct amdgpu_firmware_info *info = NULL;
1103 const struct common_firmware_header *header = NULL;
1104 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1105 unsigned int *tmp = NULL;
1107 uint16_t version_major;
1108 uint16_t version_minor;
1109 uint32_t smu_version;
1112 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1113 * instead of picasso_rlc.bin.
1115 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1116 * or revision >= 0xD8 && revision <= 0xDF
1117 * otherwise is PCO FP5
1119 if (!strcmp(chip_name, "picasso") &&
1120 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1121 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1122 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1123 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1124 (smu_version >= 0x41e2b))
1126 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1130 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1131 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1134 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1135 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1137 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1138 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1139 if (version_major == 2 && version_minor == 1)
1140 adev->gfx.rlc.is_rlc_v2_1 = true;
1142 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1143 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1144 adev->gfx.rlc.save_and_restore_offset =
1145 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1146 adev->gfx.rlc.clear_state_descriptor_offset =
1147 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1148 adev->gfx.rlc.avail_scratch_ram_locations =
1149 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1150 adev->gfx.rlc.reg_restore_list_size =
1151 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1152 adev->gfx.rlc.reg_list_format_start =
1153 le32_to_cpu(rlc_hdr->reg_list_format_start);
1154 adev->gfx.rlc.reg_list_format_separate_start =
1155 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1156 adev->gfx.rlc.starting_offsets_start =
1157 le32_to_cpu(rlc_hdr->starting_offsets_start);
1158 adev->gfx.rlc.reg_list_format_size_bytes =
1159 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1160 adev->gfx.rlc.reg_list_size_bytes =
1161 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1162 adev->gfx.rlc.register_list_format =
1163 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1164 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1165 if (!adev->gfx.rlc.register_list_format) {
1170 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1171 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1172 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1173 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1175 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1177 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1178 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1179 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1180 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1182 if (adev->gfx.rlc.is_rlc_v2_1)
1183 gfx_v9_0_init_rlc_ext_microcode(adev);
1185 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1186 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188 info->fw = adev->gfx.rlc_fw;
1189 header = (const struct common_firmware_header *)info->fw->data;
1190 adev->firmware.fw_size +=
1191 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1193 if (adev->gfx.rlc.is_rlc_v2_1 &&
1194 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1195 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1196 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1197 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1198 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1199 info->fw = adev->gfx.rlc_fw;
1200 adev->firmware.fw_size +=
1201 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1203 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1204 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1205 info->fw = adev->gfx.rlc_fw;
1206 adev->firmware.fw_size +=
1207 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1209 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1210 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1211 info->fw = adev->gfx.rlc_fw;
1212 adev->firmware.fw_size +=
1213 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1220 "gfx9: Failed to load firmware \"%s\"\n",
1222 release_firmware(adev->gfx.rlc_fw);
1223 adev->gfx.rlc_fw = NULL;
1228 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1229 const char *chip_name)
1233 struct amdgpu_firmware_info *info = NULL;
1234 const struct common_firmware_header *header = NULL;
1235 const struct gfx_firmware_header_v1_0 *cp_hdr;
1237 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1238 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1241 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1244 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1245 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1246 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1249 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1250 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1252 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1255 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1256 adev->gfx.mec2_fw->data;
1257 adev->gfx.mec2_fw_version =
1258 le32_to_cpu(cp_hdr->header.ucode_version);
1259 adev->gfx.mec2_feature_version =
1260 le32_to_cpu(cp_hdr->ucode_feature_version);
1263 adev->gfx.mec2_fw = NULL;
1266 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1267 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1268 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1269 info->fw = adev->gfx.mec_fw;
1270 header = (const struct common_firmware_header *)info->fw->data;
1271 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1272 adev->firmware.fw_size +=
1273 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1275 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1276 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1277 info->fw = adev->gfx.mec_fw;
1278 adev->firmware.fw_size +=
1279 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1281 if (adev->gfx.mec2_fw) {
1282 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1283 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1284 info->fw = adev->gfx.mec2_fw;
1285 header = (const struct common_firmware_header *)info->fw->data;
1286 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1287 adev->firmware.fw_size +=
1288 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1290 /* TODO: Determine if MEC2 JT FW loading can be removed
1291 for all GFX V9 asic and above */
1292 if (adev->asic_type != CHIP_ARCTURUS) {
1293 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1294 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1295 info->fw = adev->gfx.mec2_fw;
1296 adev->firmware.fw_size +=
1297 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1304 gfx_v9_0_check_if_need_gfxoff(adev);
1305 gfx_v9_0_check_fw_write_wait(adev);
1308 "gfx9: Failed to load firmware \"%s\"\n",
1310 release_firmware(adev->gfx.mec_fw);
1311 adev->gfx.mec_fw = NULL;
1312 release_firmware(adev->gfx.mec2_fw);
1313 adev->gfx.mec2_fw = NULL;
1318 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1320 const char *chip_name;
1325 switch (adev->asic_type) {
1327 chip_name = "vega10";
1330 chip_name = "vega12";
1333 chip_name = "vega20";
1336 if (adev->rev_id >= 8)
1337 chip_name = "raven2";
1338 else if (adev->pdev->device == 0x15d8)
1339 chip_name = "picasso";
1341 chip_name = "raven";
1344 chip_name = "arcturus";
1350 /* No CPG in Arcturus */
1351 if (adev->asic_type != CHIP_ARCTURUS) {
1352 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1357 r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1361 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1368 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1371 const struct cs_section_def *sect = NULL;
1372 const struct cs_extent_def *ext = NULL;
1374 /* begin clear state */
1376 /* context control state */
1379 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1380 for (ext = sect->section; ext->extent != NULL; ++ext) {
1381 if (sect->id == SECT_CONTEXT)
1382 count += 2 + ext->reg_count;
1388 /* end clear state */
1396 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1397 volatile u32 *buffer)
1400 const struct cs_section_def *sect = NULL;
1401 const struct cs_extent_def *ext = NULL;
1403 if (adev->gfx.rlc.cs_data == NULL)
1408 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1409 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1411 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1412 buffer[count++] = cpu_to_le32(0x80000000);
1413 buffer[count++] = cpu_to_le32(0x80000000);
1415 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1416 for (ext = sect->section; ext->extent != NULL; ++ext) {
1417 if (sect->id == SECT_CONTEXT) {
1419 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1420 buffer[count++] = cpu_to_le32(ext->reg_index -
1421 PACKET3_SET_CONTEXT_REG_START);
1422 for (i = 0; i < ext->reg_count; i++)
1423 buffer[count++] = cpu_to_le32(ext->extent[i]);
1430 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1431 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1433 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1434 buffer[count++] = cpu_to_le32(0);
1437 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1439 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1440 uint32_t pg_always_on_cu_num = 2;
1441 uint32_t always_on_cu_num;
1443 uint32_t mask, cu_bitmap, counter;
1445 if (adev->flags & AMD_IS_APU)
1446 always_on_cu_num = 4;
1447 else if (adev->asic_type == CHIP_VEGA12)
1448 always_on_cu_num = 8;
1450 always_on_cu_num = 12;
1452 mutex_lock(&adev->grbm_idx_mutex);
1453 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1454 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1458 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1460 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1461 if (cu_info->bitmap[i][j] & mask) {
1462 if (counter == pg_always_on_cu_num)
1463 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1464 if (counter < always_on_cu_num)
1473 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1474 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1477 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1478 mutex_unlock(&adev->grbm_idx_mutex);
1481 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1485 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1486 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1487 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1488 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1489 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1491 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1492 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1494 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1495 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1497 mutex_lock(&adev->grbm_idx_mutex);
1498 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1499 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1500 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1502 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1503 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1504 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1505 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1506 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1508 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1509 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1512 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1515 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1516 * programmed in gfx_v9_0_init_always_on_cu_mask()
1519 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1520 * but used for RLC_LB_CNTL configuration */
1521 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1522 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1523 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1524 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1525 mutex_unlock(&adev->grbm_idx_mutex);
1527 gfx_v9_0_init_always_on_cu_mask(adev);
1530 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1534 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1535 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1536 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1537 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1538 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1540 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1541 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1543 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1544 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1546 mutex_lock(&adev->grbm_idx_mutex);
1547 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1548 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1549 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1551 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1552 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1553 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1554 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1555 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1557 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1558 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1561 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1564 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1565 * programmed in gfx_v9_0_init_always_on_cu_mask()
1568 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1569 * but used for RLC_LB_CNTL configuration */
1570 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1571 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1572 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1573 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1574 mutex_unlock(&adev->grbm_idx_mutex);
1576 gfx_v9_0_init_always_on_cu_mask(adev);
1579 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1581 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1584 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1589 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1591 const struct cs_section_def *cs_data;
1594 adev->gfx.rlc.cs_data = gfx9_cs_data;
1596 cs_data = adev->gfx.rlc.cs_data;
1599 /* init clear state block */
1600 r = amdgpu_gfx_rlc_init_csb(adev);
1605 if (adev->asic_type == CHIP_RAVEN) {
1606 /* TODO: double check the cp_table_size for RV */
1607 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1608 r = amdgpu_gfx_rlc_init_cpt(adev);
1613 switch (adev->asic_type) {
1615 gfx_v9_0_init_lbpw(adev);
1618 gfx_v9_4_init_lbpw(adev);
1627 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1631 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1632 if (unlikely(r != 0))
1635 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1636 AMDGPU_GEM_DOMAIN_VRAM);
1638 adev->gfx.rlc.clear_state_gpu_addr =
1639 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1641 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1646 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1650 if (!adev->gfx.rlc.clear_state_obj)
1653 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1654 if (likely(r == 0)) {
1655 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1656 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1660 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1662 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1663 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1666 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1670 const __le32 *fw_data;
1673 size_t mec_hpd_size;
1675 const struct gfx_firmware_header_v1_0 *mec_hdr;
1677 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1679 /* take ownership of the relevant compute queues */
1680 amdgpu_gfx_compute_queue_acquire(adev);
1681 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1683 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1684 AMDGPU_GEM_DOMAIN_VRAM,
1685 &adev->gfx.mec.hpd_eop_obj,
1686 &adev->gfx.mec.hpd_eop_gpu_addr,
1689 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1690 gfx_v9_0_mec_fini(adev);
1694 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1696 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1697 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1699 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1701 fw_data = (const __le32 *)
1702 (adev->gfx.mec_fw->data +
1703 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1704 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1706 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1707 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1708 &adev->gfx.mec.mec_fw_obj,
1709 &adev->gfx.mec.mec_fw_gpu_addr,
1712 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1713 gfx_v9_0_mec_fini(adev);
1717 memcpy(fw, fw_data, fw_size);
1719 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1720 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1725 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1727 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1728 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1729 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1730 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1731 (SQ_IND_INDEX__FORCE_READ_MASK));
1732 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1735 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1736 uint32_t wave, uint32_t thread,
1737 uint32_t regno, uint32_t num, uint32_t *out)
1739 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1740 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1741 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1742 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1743 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1744 (SQ_IND_INDEX__FORCE_READ_MASK) |
1745 (SQ_IND_INDEX__AUTO_INCR_MASK));
1747 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1750 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1752 /* type 1 wave data */
1753 dst[(*no_fields)++] = 1;
1754 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1755 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1756 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1757 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1758 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1759 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1760 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1761 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1762 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1763 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1764 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1765 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1766 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1767 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1770 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1771 uint32_t wave, uint32_t start,
1772 uint32_t size, uint32_t *dst)
1775 adev, simd, wave, 0,
1776 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1779 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1780 uint32_t wave, uint32_t thread,
1781 uint32_t start, uint32_t size,
1785 adev, simd, wave, thread,
1786 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1789 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1790 u32 me, u32 pipe, u32 q, u32 vm)
1792 soc15_grbm_select(adev, me, pipe, q, vm);
1795 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1796 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1797 .select_se_sh = &gfx_v9_0_select_se_sh,
1798 .read_wave_data = &gfx_v9_0_read_wave_data,
1799 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1800 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1801 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1802 .ras_error_inject = &gfx_v9_0_ras_error_inject,
1803 .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1806 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1811 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1813 switch (adev->asic_type) {
1815 adev->gfx.config.max_hw_contexts = 8;
1816 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1820 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1823 adev->gfx.config.max_hw_contexts = 8;
1824 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1825 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1826 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1827 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1828 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1829 DRM_INFO("fix gfx.config for vega12\n");
1832 adev->gfx.config.max_hw_contexts = 8;
1833 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1837 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1838 gb_addr_config &= ~0xf3e777ff;
1839 gb_addr_config |= 0x22014042;
1840 /* check vbios table if gpu info is not available */
1841 err = amdgpu_atomfirmware_get_gfx_info(adev);
1846 adev->gfx.config.max_hw_contexts = 8;
1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1851 if (adev->rev_id >= 8)
1852 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1854 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1857 adev->gfx.config.max_hw_contexts = 8;
1858 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1859 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1860 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1861 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1862 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1863 gb_addr_config &= ~0xf3e777ff;
1864 gb_addr_config |= 0x22014042;
1871 adev->gfx.config.gb_addr_config = gb_addr_config;
1873 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1875 adev->gfx.config.gb_addr_config,
1879 adev->gfx.config.max_tile_pipes =
1880 adev->gfx.config.gb_addr_config_fields.num_pipes;
1882 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1884 adev->gfx.config.gb_addr_config,
1887 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1889 adev->gfx.config.gb_addr_config,
1891 MAX_COMPRESSED_FRAGS);
1892 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1894 adev->gfx.config.gb_addr_config,
1897 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1899 adev->gfx.config.gb_addr_config,
1901 NUM_SHADER_ENGINES);
1902 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1904 adev->gfx.config.gb_addr_config,
1906 PIPE_INTERLEAVE_SIZE));
1911 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1912 struct amdgpu_ngg_buf *ngg_buf,
1914 int default_size_se)
1919 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1922 size_se = size_se ? size_se : default_size_se;
1924 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1925 r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1926 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1931 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1934 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1939 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1943 for (i = 0; i < NGG_BUF_MAX; i++)
1944 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1945 &adev->gfx.ngg.buf[i].gpu_addr,
1948 memset(&adev->gfx.ngg.buf[0], 0,
1949 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1951 adev->gfx.ngg.init = false;
1956 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1960 if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1963 /* GDS reserve memory: 64 bytes alignment */
1964 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1965 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1966 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1967 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1969 /* Primitive Buffer */
1970 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1971 amdgpu_prim_buf_per_se,
1974 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1978 /* Position Buffer */
1979 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1980 amdgpu_pos_buf_per_se,
1983 dev_err(adev->dev, "Failed to create Position Buffer\n");
1987 /* Control Sideband */
1988 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1989 amdgpu_cntl_sb_buf_per_se,
1992 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1996 /* Parameter Cache, not created by default */
1997 if (amdgpu_param_buf_per_se <= 0)
2000 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2001 amdgpu_param_buf_per_se,
2004 dev_err(adev->dev, "Failed to create Parameter Cache\n");
2009 adev->gfx.ngg.init = true;
2012 gfx_v9_0_ngg_fini(adev);
2016 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2018 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2025 /* Program buffer size */
2026 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2027 adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2028 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2029 adev->gfx.ngg.buf[NGG_POS].size >> 8);
2030 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2032 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2033 adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2034 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2035 adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2036 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2038 /* Program buffer base address */
2039 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2040 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2041 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2043 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2044 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2045 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2047 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2048 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2049 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2051 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2052 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2053 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2055 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2056 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2057 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2059 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2060 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2061 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2063 /* Clear GDS reserved memory */
2064 r = amdgpu_ring_alloc(ring, 17);
2066 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2071 gfx_v9_0_write_data_to_reg(ring, 0, false,
2072 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2073 (adev->gds.gds_size +
2074 adev->gfx.ngg.gds_reserve_size));
2076 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2077 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2078 PACKET3_DMA_DATA_DST_SEL(1) |
2079 PACKET3_DMA_DATA_SRC_SEL(2)));
2080 amdgpu_ring_write(ring, 0);
2081 amdgpu_ring_write(ring, 0);
2082 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2083 amdgpu_ring_write(ring, 0);
2084 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2085 adev->gfx.ngg.gds_reserve_size);
2087 gfx_v9_0_write_data_to_reg(ring, 0, false,
2088 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2090 amdgpu_ring_commit(ring);
2095 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2096 int mec, int pipe, int queue)
2100 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2102 ring = &adev->gfx.compute_ring[ring_id];
2107 ring->queue = queue;
2109 ring->ring_obj = NULL;
2110 ring->use_doorbell = true;
2111 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2112 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2113 + (ring_id * GFX9_MEC_HPD_SIZE);
2114 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2116 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2117 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2120 /* type-2 packets are deprecated on MEC, use type-3 instead */
2121 r = amdgpu_ring_init(adev, ring, 1024,
2122 &adev->gfx.eop_irq, irq_type);
2130 static int gfx_v9_0_sw_init(void *handle)
2132 int i, j, k, r, ring_id;
2133 struct amdgpu_ring *ring;
2134 struct amdgpu_kiq *kiq;
2135 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2137 switch (adev->asic_type) {
2143 adev->gfx.mec.num_mec = 2;
2146 adev->gfx.mec.num_mec = 1;
2150 adev->gfx.mec.num_pipe_per_mec = 4;
2151 adev->gfx.mec.num_queue_per_pipe = 8;
2154 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2158 /* Privileged reg */
2159 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2160 &adev->gfx.priv_reg_irq);
2164 /* Privileged inst */
2165 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2166 &adev->gfx.priv_inst_irq);
2171 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2172 &adev->gfx.cp_ecc_error_irq);
2177 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2178 &adev->gfx.cp_ecc_error_irq);
2182 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2184 gfx_v9_0_scratch_init(adev);
2186 r = gfx_v9_0_init_microcode(adev);
2188 DRM_ERROR("Failed to load gfx firmware!\n");
2192 r = adev->gfx.rlc.funcs->init(adev);
2194 DRM_ERROR("Failed to init rlc BOs!\n");
2198 r = gfx_v9_0_mec_init(adev);
2200 DRM_ERROR("Failed to init MEC BOs!\n");
2204 /* set up the gfx ring */
2205 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2206 ring = &adev->gfx.gfx_ring[i];
2207 ring->ring_obj = NULL;
2209 sprintf(ring->name, "gfx");
2211 sprintf(ring->name, "gfx_%d", i);
2212 ring->use_doorbell = true;
2213 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2214 r = amdgpu_ring_init(adev, ring, 1024,
2215 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2220 /* set up the compute queues - allocate horizontally across pipes */
2222 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2223 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2224 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2225 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2228 r = gfx_v9_0_compute_ring_init(adev,
2239 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2241 DRM_ERROR("Failed to init KIQ BOs!\n");
2245 kiq = &adev->gfx.kiq;
2246 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2250 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2251 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2255 adev->gfx.ce_ram_size = 0x8000;
2257 r = gfx_v9_0_gpu_early_init(adev);
2261 r = gfx_v9_0_ngg_init(adev);
2269 static int gfx_v9_0_sw_fini(void *handle)
2272 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2274 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2276 struct ras_common_if *ras_if = adev->gfx.ras_if;
2277 struct ras_ih_if ih_info = {
2281 amdgpu_ras_debugfs_remove(adev, ras_if);
2282 amdgpu_ras_sysfs_remove(adev, ras_if);
2283 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
2284 amdgpu_ras_feature_enable(adev, ras_if, 0);
2288 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2289 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2290 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2291 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2293 amdgpu_gfx_mqd_sw_fini(adev);
2294 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2295 amdgpu_gfx_kiq_fini(adev);
2297 gfx_v9_0_mec_fini(adev);
2298 gfx_v9_0_ngg_fini(adev);
2299 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2300 if (adev->asic_type == CHIP_RAVEN) {
2301 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2302 &adev->gfx.rlc.cp_table_gpu_addr,
2303 (void **)&adev->gfx.rlc.cp_table_ptr);
2305 gfx_v9_0_free_microcode(adev);
2311 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2316 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2320 if (instance == 0xffffffff)
2321 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2323 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2325 if (se_num == 0xffffffff)
2326 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2328 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2330 if (sh_num == 0xffffffff)
2331 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2333 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2335 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2338 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2342 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2343 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2345 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2346 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2348 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2349 adev->gfx.config.max_sh_per_se);
2351 return (~data) & mask;
2354 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2359 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2360 adev->gfx.config.max_sh_per_se;
2362 mutex_lock(&adev->grbm_idx_mutex);
2363 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2364 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2365 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2366 data = gfx_v9_0_get_rb_active_bitmap(adev);
2367 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2368 rb_bitmap_width_per_sh);
2371 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2372 mutex_unlock(&adev->grbm_idx_mutex);
2374 adev->gfx.config.backend_enable_mask = active_rbs;
2375 adev->gfx.config.num_rbs = hweight32(active_rbs);
2378 #define DEFAULT_SH_MEM_BASES (0x6000)
2379 #define FIRST_COMPUTE_VMID (8)
2380 #define LAST_COMPUTE_VMID (16)
2381 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2384 uint32_t sh_mem_config;
2385 uint32_t sh_mem_bases;
2388 * Configure apertures:
2389 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2390 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2391 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2393 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2395 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2396 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2397 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2399 mutex_lock(&adev->srbm_mutex);
2400 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2401 soc15_grbm_select(adev, 0, 0, 0, i);
2402 /* CP and shaders */
2403 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2404 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2406 soc15_grbm_select(adev, 0, 0, 0, 0);
2407 mutex_unlock(&adev->srbm_mutex);
2410 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2415 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2416 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2417 * the driver can enable them for graphics. VMID0 should maintain
2418 * access so that HWS firmware can save/restore entries.
2420 for (vmid = 1; vmid < 16; vmid++) {
2421 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2422 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2423 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2424 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2428 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2433 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2435 gfx_v9_0_tiling_mode_table_init(adev);
2437 gfx_v9_0_setup_rb(adev);
2438 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2439 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2441 /* XXX SH_MEM regs */
2442 /* where to put LDS, scratch, GPUVM in FSA64 space */
2443 mutex_lock(&adev->srbm_mutex);
2444 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2445 soc15_grbm_select(adev, 0, 0, 0, i);
2446 /* CP and shaders */
2448 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2449 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2450 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2452 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2453 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2455 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2456 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2457 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2459 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2460 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2461 (adev->gmc.private_aperture_start >> 48));
2462 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2463 (adev->gmc.shared_aperture_start >> 48));
2464 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2467 soc15_grbm_select(adev, 0, 0, 0, 0);
2469 mutex_unlock(&adev->srbm_mutex);
2471 gfx_v9_0_init_compute_vmid(adev);
2472 gfx_v9_0_init_gds_vmid(adev);
2475 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2480 mutex_lock(&adev->grbm_idx_mutex);
2481 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2482 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2483 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2484 for (k = 0; k < adev->usec_timeout; k++) {
2485 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2489 if (k == adev->usec_timeout) {
2490 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2491 0xffffffff, 0xffffffff);
2492 mutex_unlock(&adev->grbm_idx_mutex);
2493 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2499 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2500 mutex_unlock(&adev->grbm_idx_mutex);
2502 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2503 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2504 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2505 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2506 for (k = 0; k < adev->usec_timeout; k++) {
2507 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2513 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2516 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2518 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2519 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2520 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2521 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2523 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2526 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2529 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2530 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2531 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2532 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2533 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2534 adev->gfx.rlc.clear_state_size);
2537 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2538 int indirect_offset,
2540 int *unique_indirect_regs,
2541 int unique_indirect_reg_count,
2542 int *indirect_start_offsets,
2543 int *indirect_start_offsets_count,
2544 int max_start_offsets_count)
2548 for (; indirect_offset < list_size; indirect_offset++) {
2549 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2550 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2551 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2553 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2554 indirect_offset += 2;
2556 /* look for the matching indice */
2557 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2558 if (unique_indirect_regs[idx] ==
2559 register_list_format[indirect_offset] ||
2560 !unique_indirect_regs[idx])
2564 BUG_ON(idx >= unique_indirect_reg_count);
2566 if (!unique_indirect_regs[idx])
2567 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2574 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2576 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2577 int unique_indirect_reg_count = 0;
2579 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2580 int indirect_start_offsets_count = 0;
2586 u32 *register_list_format =
2587 kmemdup(adev->gfx.rlc.register_list_format,
2588 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2589 if (!register_list_format)
2592 /* setup unique_indirect_regs array and indirect_start_offsets array */
2593 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2594 gfx_v9_1_parse_ind_reg_list(register_list_format,
2595 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2596 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2597 unique_indirect_regs,
2598 unique_indirect_reg_count,
2599 indirect_start_offsets,
2600 &indirect_start_offsets_count,
2601 ARRAY_SIZE(indirect_start_offsets));
2603 /* enable auto inc in case it is disabled */
2604 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2605 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2606 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2608 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2609 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2610 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2611 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2612 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2613 adev->gfx.rlc.register_restore[i]);
2615 /* load indirect register */
2616 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2617 adev->gfx.rlc.reg_list_format_start);
2619 /* direct register portion */
2620 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2621 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2622 register_list_format[i]);
2624 /* indirect register portion */
2625 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2626 if (register_list_format[i] == 0xFFFFFFFF) {
2627 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2631 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2632 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2634 for (j = 0; j < unique_indirect_reg_count; j++) {
2635 if (register_list_format[i] == unique_indirect_regs[j]) {
2636 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2641 BUG_ON(j >= unique_indirect_reg_count);
2646 /* set save/restore list size */
2647 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2648 list_size = list_size >> 1;
2649 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2650 adev->gfx.rlc.reg_restore_list_size);
2651 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2653 /* write the starting offsets to RLC scratch ram */
2654 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2655 adev->gfx.rlc.starting_offsets_start);
2656 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2657 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2658 indirect_start_offsets[i]);
2660 /* load unique indirect regs*/
2661 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2662 if (unique_indirect_regs[i] != 0) {
2663 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2664 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2665 unique_indirect_regs[i] & 0x3FFFF);
2667 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2668 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2669 unique_indirect_regs[i] >> 20);
2673 kfree(register_list_format);
2677 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2679 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2682 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2686 uint32_t default_data = 0;
2688 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2689 if (enable == true) {
2690 /* enable GFXIP control over CGPG */
2691 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2692 if(default_data != data)
2693 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2696 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2697 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2698 if(default_data != data)
2699 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2701 /* restore GFXIP control over GCPG */
2702 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2703 if(default_data != data)
2704 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2708 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2712 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2713 AMD_PG_SUPPORT_GFX_SMG |
2714 AMD_PG_SUPPORT_GFX_DMG)) {
2715 /* init IDLE_POLL_COUNT = 60 */
2716 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2717 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2718 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2719 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2721 /* init RLC PG Delay */
2723 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2724 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2725 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2726 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2727 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2729 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2730 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2731 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2732 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2734 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2735 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2736 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2737 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2739 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2740 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2742 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2743 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2744 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2746 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2750 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2754 uint32_t default_data = 0;
2756 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2757 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2758 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2760 if (default_data != data)
2761 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2764 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2768 uint32_t default_data = 0;
2770 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2771 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2772 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2774 if(default_data != data)
2775 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2778 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2782 uint32_t default_data = 0;
2784 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2785 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2788 if(default_data != data)
2789 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2792 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2795 uint32_t data, default_data;
2797 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2798 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2799 GFX_POWER_GATING_ENABLE,
2801 if(default_data != data)
2802 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2805 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2808 uint32_t data, default_data;
2810 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2811 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2812 GFX_PIPELINE_PG_ENABLE,
2814 if(default_data != data)
2815 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2818 /* read any GFX register to wake up GFX */
2819 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2822 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2825 uint32_t data, default_data;
2827 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2828 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2829 STATIC_PER_CU_PG_ENABLE,
2831 if(default_data != data)
2832 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2835 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2838 uint32_t data, default_data;
2840 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2841 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2842 DYN_PER_CU_PG_ENABLE,
2844 if(default_data != data)
2845 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2848 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2850 gfx_v9_0_init_csb(adev);
2853 * Rlc save restore list is workable since v2_1.
2854 * And it's needed by gfxoff feature.
2856 if (adev->gfx.rlc.is_rlc_v2_1) {
2857 gfx_v9_1_init_rlc_save_restore_list(adev);
2858 gfx_v9_0_enable_save_restore_machine(adev);
2861 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2862 AMD_PG_SUPPORT_GFX_SMG |
2863 AMD_PG_SUPPORT_GFX_DMG |
2865 AMD_PG_SUPPORT_GDS |
2866 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2867 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2868 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2869 gfx_v9_0_init_gfx_power_gating(adev);
2873 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2875 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2876 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2877 gfx_v9_0_wait_for_rlc_serdes(adev);
2880 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2882 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2884 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2888 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2890 #ifdef AMDGPU_RLC_DEBUG_RETRY
2894 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2897 /* carrizo do enable cp interrupt after cp inited */
2898 if (!(adev->flags & AMD_IS_APU)) {
2899 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2903 #ifdef AMDGPU_RLC_DEBUG_RETRY
2904 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2905 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2906 if(rlc_ucode_ver == 0x108) {
2907 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2908 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2909 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2910 * default is 0x9C4 to create a 100us interval */
2911 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2912 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2913 * to disable the page fault retry interrupts, default is
2915 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2920 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2922 const struct rlc_firmware_header_v2_0 *hdr;
2923 const __le32 *fw_data;
2924 unsigned i, fw_size;
2926 if (!adev->gfx.rlc_fw)
2929 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2930 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2932 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2933 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2934 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2936 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2937 RLCG_UCODE_LOADING_START_ADDRESS);
2938 for (i = 0; i < fw_size; i++)
2939 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2940 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2945 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2949 if (amdgpu_sriov_vf(adev)) {
2950 gfx_v9_0_init_csb(adev);
2954 adev->gfx.rlc.funcs->stop(adev);
2957 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2959 gfx_v9_0_init_pg(adev);
2961 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2962 /* legacy rlc firmware loading */
2963 r = gfx_v9_0_rlc_load_microcode(adev);
2968 switch (adev->asic_type) {
2970 if (amdgpu_lbpw == 0)
2971 gfx_v9_0_enable_lbpw(adev, false);
2973 gfx_v9_0_enable_lbpw(adev, true);
2976 if (amdgpu_lbpw > 0)
2977 gfx_v9_0_enable_lbpw(adev, true);
2979 gfx_v9_0_enable_lbpw(adev, false);
2985 adev->gfx.rlc.funcs->start(adev);
2990 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2993 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2995 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2996 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2997 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2999 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3000 adev->gfx.gfx_ring[i].sched.ready = false;
3002 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3006 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3008 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3009 const struct gfx_firmware_header_v1_0 *ce_hdr;
3010 const struct gfx_firmware_header_v1_0 *me_hdr;
3011 const __le32 *fw_data;
3012 unsigned i, fw_size;
3014 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3017 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3018 adev->gfx.pfp_fw->data;
3019 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3020 adev->gfx.ce_fw->data;
3021 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3022 adev->gfx.me_fw->data;
3024 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3025 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3026 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3028 gfx_v9_0_cp_gfx_enable(adev, false);
3031 fw_data = (const __le32 *)
3032 (adev->gfx.pfp_fw->data +
3033 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3034 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3035 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3036 for (i = 0; i < fw_size; i++)
3037 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3038 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3041 fw_data = (const __le32 *)
3042 (adev->gfx.ce_fw->data +
3043 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3044 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3045 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3046 for (i = 0; i < fw_size; i++)
3047 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3048 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3051 fw_data = (const __le32 *)
3052 (adev->gfx.me_fw->data +
3053 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3054 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3055 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3056 for (i = 0; i < fw_size; i++)
3057 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3058 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3063 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3065 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3066 const struct cs_section_def *sect = NULL;
3067 const struct cs_extent_def *ext = NULL;
3071 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3072 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3074 gfx_v9_0_cp_gfx_enable(adev, true);
3076 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3078 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3082 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3083 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3085 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3086 amdgpu_ring_write(ring, 0x80000000);
3087 amdgpu_ring_write(ring, 0x80000000);
3089 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3090 for (ext = sect->section; ext->extent != NULL; ++ext) {
3091 if (sect->id == SECT_CONTEXT) {
3092 amdgpu_ring_write(ring,
3093 PACKET3(PACKET3_SET_CONTEXT_REG,
3095 amdgpu_ring_write(ring,
3096 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3097 for (i = 0; i < ext->reg_count; i++)
3098 amdgpu_ring_write(ring, ext->extent[i]);
3103 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3104 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3106 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3107 amdgpu_ring_write(ring, 0);
3109 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3110 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3111 amdgpu_ring_write(ring, 0x8000);
3112 amdgpu_ring_write(ring, 0x8000);
3114 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3115 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3116 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3117 amdgpu_ring_write(ring, tmp);
3118 amdgpu_ring_write(ring, 0);
3120 amdgpu_ring_commit(ring);
3125 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3127 struct amdgpu_ring *ring;
3130 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3132 /* Set the write pointer delay */
3133 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3135 /* set the RB to use vmid 0 */
3136 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3138 /* Set ring buffer size */
3139 ring = &adev->gfx.gfx_ring[0];
3140 rb_bufsz = order_base_2(ring->ring_size / 8);
3141 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3142 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3144 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3146 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3148 /* Initialize the ring buffer's write pointers */
3150 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3151 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3153 /* set the wb address wether it's enabled or not */
3154 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3155 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3156 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3158 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3159 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3160 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3163 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3165 rb_addr = ring->gpu_addr >> 8;
3166 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3167 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3169 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3170 if (ring->use_doorbell) {
3171 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3172 DOORBELL_OFFSET, ring->doorbell_index);
3173 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3176 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3178 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3180 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3181 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3182 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3184 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3185 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3188 /* start the ring */
3189 gfx_v9_0_cp_gfx_start(adev);
3190 ring->sched.ready = true;
3195 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3200 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3202 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3203 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3204 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3205 adev->gfx.compute_ring[i].sched.ready = false;
3206 adev->gfx.kiq.ring.sched.ready = false;
3211 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3213 const struct gfx_firmware_header_v1_0 *mec_hdr;
3214 const __le32 *fw_data;
3218 if (!adev->gfx.mec_fw)
3221 gfx_v9_0_cp_compute_enable(adev, false);
3223 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3224 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3226 fw_data = (const __le32 *)
3227 (adev->gfx.mec_fw->data +
3228 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3230 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3231 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3232 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3234 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3235 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3236 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3237 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3240 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3241 mec_hdr->jt_offset);
3242 for (i = 0; i < mec_hdr->jt_size; i++)
3243 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3244 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3246 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3247 adev->gfx.mec_fw_version);
3248 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3254 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3257 struct amdgpu_device *adev = ring->adev;
3259 /* tell RLC which is KIQ queue */
3260 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3262 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3263 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3265 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3268 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3270 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3271 uint64_t queue_mask = 0;
3274 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3275 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3278 /* This situation may be hit in the future if a new HW
3279 * generation exposes more than 64 queues. If so, the
3280 * definition of queue_mask needs updating */
3281 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3282 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3286 queue_mask |= (1ull << i);
3289 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3291 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3296 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3297 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3298 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3299 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3300 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3301 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3302 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3303 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3304 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3305 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3306 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3307 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3308 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3310 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3311 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3312 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3313 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3314 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3315 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3316 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3317 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3318 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3319 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3320 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3321 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3322 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3323 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3324 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3325 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3326 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3329 r = amdgpu_ring_test_helper(kiq_ring);
3331 DRM_ERROR("KCQ enable failed\n");
3336 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3338 struct amdgpu_device *adev = ring->adev;
3339 struct v9_mqd *mqd = ring->mqd_ptr;
3340 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3343 mqd->header = 0xC0310800;
3344 mqd->compute_pipelinestat_enable = 0x00000001;
3345 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3346 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3347 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3348 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3349 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3350 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3351 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3352 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3353 mqd->compute_misc_reserved = 0x00000003;
3355 mqd->dynamic_cu_mask_addr_lo =
3356 lower_32_bits(ring->mqd_gpu_addr
3357 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3358 mqd->dynamic_cu_mask_addr_hi =
3359 upper_32_bits(ring->mqd_gpu_addr
3360 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3362 eop_base_addr = ring->eop_gpu_addr >> 8;
3363 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3364 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3366 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3367 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3368 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3369 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3371 mqd->cp_hqd_eop_control = tmp;
3373 /* enable doorbell? */
3374 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3376 if (ring->use_doorbell) {
3377 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3378 DOORBELL_OFFSET, ring->doorbell_index);
3379 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3381 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3382 DOORBELL_SOURCE, 0);
3383 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3386 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3390 mqd->cp_hqd_pq_doorbell_control = tmp;
3392 /* disable the queue if it's active */
3394 mqd->cp_hqd_dequeue_request = 0;
3395 mqd->cp_hqd_pq_rptr = 0;
3396 mqd->cp_hqd_pq_wptr_lo = 0;
3397 mqd->cp_hqd_pq_wptr_hi = 0;
3399 /* set the pointer to the MQD */
3400 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3401 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3403 /* set MQD vmid to 0 */
3404 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3405 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3406 mqd->cp_mqd_control = tmp;
3408 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3409 hqd_gpu_addr = ring->gpu_addr >> 8;
3410 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3411 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3413 /* set up the HQD, this is similar to CP_RB0_CNTL */
3414 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3415 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3416 (order_base_2(ring->ring_size / 4) - 1));
3417 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3418 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3420 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3422 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3423 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3424 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3425 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3426 mqd->cp_hqd_pq_control = tmp;
3428 /* set the wb address whether it's enabled or not */
3429 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3430 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3431 mqd->cp_hqd_pq_rptr_report_addr_hi =
3432 upper_32_bits(wb_gpu_addr) & 0xffff;
3434 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3435 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3436 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3437 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3440 /* enable the doorbell if requested */
3441 if (ring->use_doorbell) {
3442 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3443 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3444 DOORBELL_OFFSET, ring->doorbell_index);
3446 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3448 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3449 DOORBELL_SOURCE, 0);
3450 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3454 mqd->cp_hqd_pq_doorbell_control = tmp;
3456 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3458 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3460 /* set the vmid for the queue */
3461 mqd->cp_hqd_vmid = 0;
3463 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3464 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3465 mqd->cp_hqd_persistent_state = tmp;
3467 /* set MIN_IB_AVAIL_SIZE */
3468 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3469 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3470 mqd->cp_hqd_ib_control = tmp;
3472 /* activate the queue */
3473 mqd->cp_hqd_active = 1;
3478 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3480 struct amdgpu_device *adev = ring->adev;
3481 struct v9_mqd *mqd = ring->mqd_ptr;
3484 /* disable wptr polling */
3485 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3487 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3488 mqd->cp_hqd_eop_base_addr_lo);
3489 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3490 mqd->cp_hqd_eop_base_addr_hi);
3492 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3493 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3494 mqd->cp_hqd_eop_control);
3496 /* enable doorbell? */
3497 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3498 mqd->cp_hqd_pq_doorbell_control);
3500 /* disable the queue if it's active */
3501 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3502 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3503 for (j = 0; j < adev->usec_timeout; j++) {
3504 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3508 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3509 mqd->cp_hqd_dequeue_request);
3510 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3511 mqd->cp_hqd_pq_rptr);
3512 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3513 mqd->cp_hqd_pq_wptr_lo);
3514 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3515 mqd->cp_hqd_pq_wptr_hi);
3518 /* set the pointer to the MQD */
3519 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3520 mqd->cp_mqd_base_addr_lo);
3521 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3522 mqd->cp_mqd_base_addr_hi);
3524 /* set MQD vmid to 0 */
3525 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3526 mqd->cp_mqd_control);
3528 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3529 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3530 mqd->cp_hqd_pq_base_lo);
3531 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3532 mqd->cp_hqd_pq_base_hi);
3534 /* set up the HQD, this is similar to CP_RB0_CNTL */
3535 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3536 mqd->cp_hqd_pq_control);
3538 /* set the wb address whether it's enabled or not */
3539 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3540 mqd->cp_hqd_pq_rptr_report_addr_lo);
3541 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3542 mqd->cp_hqd_pq_rptr_report_addr_hi);
3544 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3545 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3546 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3547 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3548 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3550 /* enable the doorbell if requested */
3551 if (ring->use_doorbell) {
3552 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3553 (adev->doorbell_index.kiq * 2) << 2);
3554 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3555 (adev->doorbell_index.userqueue_end * 2) << 2);
3558 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3559 mqd->cp_hqd_pq_doorbell_control);
3561 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3562 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3563 mqd->cp_hqd_pq_wptr_lo);
3564 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3565 mqd->cp_hqd_pq_wptr_hi);
3567 /* set the vmid for the queue */
3568 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3570 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3571 mqd->cp_hqd_persistent_state);
3573 /* activate the queue */
3574 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3575 mqd->cp_hqd_active);
3577 if (ring->use_doorbell)
3578 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3583 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3585 struct amdgpu_device *adev = ring->adev;
3588 /* disable the queue if it's active */
3589 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3591 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3593 for (j = 0; j < adev->usec_timeout; j++) {
3594 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3599 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3600 DRM_DEBUG("KIQ dequeue request failed.\n");
3602 /* Manual disable if dequeue request times out */
3603 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3606 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3610 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3611 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3612 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3613 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3614 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3615 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3616 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3617 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3622 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3624 struct amdgpu_device *adev = ring->adev;
3625 struct v9_mqd *mqd = ring->mqd_ptr;
3626 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3628 gfx_v9_0_kiq_setting(ring);
3630 if (adev->in_gpu_reset) { /* for GPU_RESET case */
3631 /* reset MQD to a clean status */
3632 if (adev->gfx.mec.mqd_backup[mqd_idx])
3633 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3635 /* reset ring buffer */
3637 amdgpu_ring_clear_ring(ring);
3639 mutex_lock(&adev->srbm_mutex);
3640 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3641 gfx_v9_0_kiq_init_register(ring);
3642 soc15_grbm_select(adev, 0, 0, 0, 0);
3643 mutex_unlock(&adev->srbm_mutex);
3645 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3646 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3647 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3648 mutex_lock(&adev->srbm_mutex);
3649 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3650 gfx_v9_0_mqd_init(ring);
3651 gfx_v9_0_kiq_init_register(ring);
3652 soc15_grbm_select(adev, 0, 0, 0, 0);
3653 mutex_unlock(&adev->srbm_mutex);
3655 if (adev->gfx.mec.mqd_backup[mqd_idx])
3656 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3662 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3664 struct amdgpu_device *adev = ring->adev;
3665 struct v9_mqd *mqd = ring->mqd_ptr;
3666 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3668 if (!adev->in_gpu_reset && !adev->in_suspend) {
3669 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3670 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3671 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3672 mutex_lock(&adev->srbm_mutex);
3673 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3674 gfx_v9_0_mqd_init(ring);
3675 soc15_grbm_select(adev, 0, 0, 0, 0);
3676 mutex_unlock(&adev->srbm_mutex);
3678 if (adev->gfx.mec.mqd_backup[mqd_idx])
3679 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3680 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3681 /* reset MQD to a clean status */
3682 if (adev->gfx.mec.mqd_backup[mqd_idx])
3683 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3685 /* reset ring buffer */
3687 amdgpu_ring_clear_ring(ring);
3689 amdgpu_ring_clear_ring(ring);
3695 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3697 struct amdgpu_ring *ring;
3700 ring = &adev->gfx.kiq.ring;
3702 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3703 if (unlikely(r != 0))
3706 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3707 if (unlikely(r != 0))
3710 gfx_v9_0_kiq_init_queue(ring);
3711 amdgpu_bo_kunmap(ring->mqd_obj);
3712 ring->mqd_ptr = NULL;
3713 amdgpu_bo_unreserve(ring->mqd_obj);
3714 ring->sched.ready = true;
3718 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3720 struct amdgpu_ring *ring = NULL;
3723 gfx_v9_0_cp_compute_enable(adev, true);
3725 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3726 ring = &adev->gfx.compute_ring[i];
3728 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3729 if (unlikely(r != 0))
3731 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3733 r = gfx_v9_0_kcq_init_queue(ring);
3734 amdgpu_bo_kunmap(ring->mqd_obj);
3735 ring->mqd_ptr = NULL;
3737 amdgpu_bo_unreserve(ring->mqd_obj);
3742 r = gfx_v9_0_kiq_kcq_enable(adev);
3747 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3750 struct amdgpu_ring *ring;
3752 if (!(adev->flags & AMD_IS_APU))
3753 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3755 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3756 if (adev->asic_type != CHIP_ARCTURUS) {
3757 /* legacy firmware loading */
3758 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3763 r = gfx_v9_0_cp_compute_load_microcode(adev);
3768 r = gfx_v9_0_kiq_resume(adev);
3772 if (adev->asic_type != CHIP_ARCTURUS) {
3773 r = gfx_v9_0_cp_gfx_resume(adev);
3778 r = gfx_v9_0_kcq_resume(adev);
3782 if (adev->asic_type != CHIP_ARCTURUS) {
3783 ring = &adev->gfx.gfx_ring[0];
3784 r = amdgpu_ring_test_helper(ring);
3789 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3790 ring = &adev->gfx.compute_ring[i];
3791 amdgpu_ring_test_helper(ring);
3794 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3799 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3801 if (adev->asic_type != CHIP_ARCTURUS)
3802 gfx_v9_0_cp_gfx_enable(adev, enable);
3803 gfx_v9_0_cp_compute_enable(adev, enable);
3806 static int gfx_v9_0_hw_init(void *handle)
3809 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3811 if (!amdgpu_sriov_vf(adev))
3812 gfx_v9_0_init_golden_registers(adev);
3814 gfx_v9_0_constants_init(adev);
3816 r = gfx_v9_0_csb_vram_pin(adev);
3820 r = adev->gfx.rlc.funcs->resume(adev);
3824 r = gfx_v9_0_cp_resume(adev);
3828 if (adev->asic_type != CHIP_ARCTURUS) {
3829 r = gfx_v9_0_ngg_en(adev);
3837 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3840 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3842 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3844 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3846 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3847 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3849 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3850 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3851 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3852 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3853 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3854 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3855 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3856 amdgpu_ring_write(kiq_ring, 0);
3857 amdgpu_ring_write(kiq_ring, 0);
3858 amdgpu_ring_write(kiq_ring, 0);
3860 r = amdgpu_ring_test_helper(kiq_ring);
3862 DRM_ERROR("KCQ disable failed\n");
3867 static int gfx_v9_0_hw_fini(void *handle)
3869 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3871 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3872 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3873 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3875 /* disable KCQ to avoid CPC touch memory not valid anymore */
3876 gfx_v9_0_kcq_disable(adev);
3878 if (amdgpu_sriov_vf(adev)) {
3879 gfx_v9_0_cp_gfx_enable(adev, false);
3880 /* must disable polling for SRIOV when hw finished, otherwise
3881 * CPC engine may still keep fetching WB address which is already
3882 * invalid after sw finished and trigger DMAR reading error in
3885 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3889 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3890 * otherwise KIQ is hanging when binding back
3892 if (!adev->in_gpu_reset && !adev->in_suspend) {
3893 mutex_lock(&adev->srbm_mutex);
3894 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3895 adev->gfx.kiq.ring.pipe,
3896 adev->gfx.kiq.ring.queue, 0);
3897 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3898 soc15_grbm_select(adev, 0, 0, 0, 0);
3899 mutex_unlock(&adev->srbm_mutex);
3902 gfx_v9_0_cp_enable(adev, false);
3903 adev->gfx.rlc.funcs->stop(adev);
3905 gfx_v9_0_csb_vram_unpin(adev);
3910 static int gfx_v9_0_suspend(void *handle)
3912 return gfx_v9_0_hw_fini(handle);
3915 static int gfx_v9_0_resume(void *handle)
3917 return gfx_v9_0_hw_init(handle);
3920 static bool gfx_v9_0_is_idle(void *handle)
3922 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3924 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3925 GRBM_STATUS, GUI_ACTIVE))
3931 static int gfx_v9_0_wait_for_idle(void *handle)
3934 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3936 for (i = 0; i < adev->usec_timeout; i++) {
3937 if (gfx_v9_0_is_idle(handle))
3944 static int gfx_v9_0_soft_reset(void *handle)
3946 u32 grbm_soft_reset = 0;
3948 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3951 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3952 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3953 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3954 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3955 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3956 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3957 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3958 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3959 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3960 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3961 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3964 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3965 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3966 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3970 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3971 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3972 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3973 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3976 if (grbm_soft_reset) {
3978 adev->gfx.rlc.funcs->stop(adev);
3980 if (adev->asic_type != CHIP_ARCTURUS)
3981 /* Disable GFX parsing/prefetching */
3982 gfx_v9_0_cp_gfx_enable(adev, false);
3984 /* Disable MEC parsing/prefetching */
3985 gfx_v9_0_cp_compute_enable(adev, false);
3987 if (grbm_soft_reset) {
3988 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3989 tmp |= grbm_soft_reset;
3990 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3991 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3992 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3996 tmp &= ~grbm_soft_reset;
3997 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3998 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4001 /* Wait a little for things to settle down */
4007 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4011 mutex_lock(&adev->gfx.gpu_clock_mutex);
4012 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4013 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4014 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4015 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4019 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4021 uint32_t gds_base, uint32_t gds_size,
4022 uint32_t gws_base, uint32_t gws_size,
4023 uint32_t oa_base, uint32_t oa_size)
4025 struct amdgpu_device *adev = ring->adev;
4028 gfx_v9_0_write_data_to_reg(ring, 0, false,
4029 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4033 gfx_v9_0_write_data_to_reg(ring, 0, false,
4034 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4038 gfx_v9_0_write_data_to_reg(ring, 0, false,
4039 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4040 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4043 gfx_v9_0_write_data_to_reg(ring, 0, false,
4044 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4045 (1 << (oa_size + oa_base)) - (1 << oa_base));
4048 static const u32 vgpr_init_compute_shader[] =
4050 0xb07c0000, 0xbe8000ff,
4051 0x000000f8, 0xbf110800,
4052 0x7e000280, 0x7e020280,
4053 0x7e040280, 0x7e060280,
4054 0x7e080280, 0x7e0a0280,
4055 0x7e0c0280, 0x7e0e0280,
4056 0x80808800, 0xbe803200,
4057 0xbf84fff5, 0xbf9c0000,
4058 0xd28c0001, 0x0001007f,
4059 0xd28d0001, 0x0002027e,
4060 0x10020288, 0xb8810904,
4061 0xb7814000, 0xd1196a01,
4062 0x00000301, 0xbe800087,
4063 0xbefc00c1, 0xd89c4000,
4064 0x00020201, 0xd89cc080,
4065 0x00040401, 0x320202ff,
4066 0x00000800, 0x80808100,
4067 0xbf84fff8, 0x7e020280,
4068 0xbf810000, 0x00000000,
4071 static const u32 sgpr_init_compute_shader[] =
4073 0xb07c0000, 0xbe8000ff,
4074 0x0000005f, 0xbee50080,
4075 0xbe812c65, 0xbe822c65,
4076 0xbe832c65, 0xbe842c65,
4077 0xbe852c65, 0xb77c0005,
4078 0x80808500, 0xbf84fff8,
4079 0xbe800080, 0xbf810000,
4082 static const struct soc15_reg_entry vgpr_init_regs[] = {
4083 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4084 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4085 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4086 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4087 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4088 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4089 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4090 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4091 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4092 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4095 static const struct soc15_reg_entry sgpr_init_regs[] = {
4096 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4097 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4098 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4099 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4100 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4101 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4102 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4103 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4104 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4105 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4108 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4109 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4110 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4111 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4112 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4113 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4114 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4115 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4116 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4117 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4118 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4119 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4120 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4121 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4122 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4123 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4124 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4125 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4126 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4127 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4128 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4129 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4130 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4131 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4132 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4133 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4134 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4135 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4136 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4137 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4138 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4139 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4140 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4143 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4145 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4148 r = amdgpu_ring_alloc(ring, 7);
4150 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4155 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4156 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4158 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4159 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4160 PACKET3_DMA_DATA_DST_SEL(1) |
4161 PACKET3_DMA_DATA_SRC_SEL(2) |
4162 PACKET3_DMA_DATA_ENGINE(0)));
4163 amdgpu_ring_write(ring, 0);
4164 amdgpu_ring_write(ring, 0);
4165 amdgpu_ring_write(ring, 0);
4166 amdgpu_ring_write(ring, 0);
4167 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4168 adev->gds.gds_size);
4170 amdgpu_ring_commit(ring);
4172 for (i = 0; i < adev->usec_timeout; i++) {
4173 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4178 if (i >= adev->usec_timeout)
4181 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4186 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4188 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4189 struct amdgpu_ib ib;
4190 struct dma_fence *f = NULL;
4192 unsigned total_size, vgpr_offset, sgpr_offset;
4195 /* only support when RAS is enabled */
4196 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4199 /* bail if the compute ring is not ready */
4200 if (!ring->sched.ready)
4204 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4206 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4207 total_size = ALIGN(total_size, 256);
4208 vgpr_offset = total_size;
4209 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4210 sgpr_offset = total_size;
4211 total_size += sizeof(sgpr_init_compute_shader);
4213 /* allocate an indirect buffer to put the commands in */
4214 memset(&ib, 0, sizeof(ib));
4215 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4217 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4221 /* load the compute shaders */
4222 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4223 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4225 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4226 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4228 /* init the ib length to 0 */
4232 /* write the register state for the compute dispatch */
4233 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4234 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4235 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4236 - PACKET3_SET_SH_REG_START;
4237 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4239 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4240 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4241 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4242 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4243 - PACKET3_SET_SH_REG_START;
4244 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4245 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4247 /* write dispatch packet */
4248 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4249 ib.ptr[ib.length_dw++] = 128; /* x */
4250 ib.ptr[ib.length_dw++] = 1; /* y */
4251 ib.ptr[ib.length_dw++] = 1; /* z */
4252 ib.ptr[ib.length_dw++] =
4253 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4255 /* write CS partial flush packet */
4256 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4257 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4260 /* write the register state for the compute dispatch */
4261 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4262 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4263 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4264 - PACKET3_SET_SH_REG_START;
4265 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4267 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4268 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4269 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4270 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4271 - PACKET3_SET_SH_REG_START;
4272 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4273 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4275 /* write dispatch packet */
4276 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4277 ib.ptr[ib.length_dw++] = 128; /* x */
4278 ib.ptr[ib.length_dw++] = 1; /* y */
4279 ib.ptr[ib.length_dw++] = 1; /* z */
4280 ib.ptr[ib.length_dw++] =
4281 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4283 /* write CS partial flush packet */
4284 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4285 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4287 /* shedule the ib on the ring */
4288 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4290 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4294 /* wait for the GPU to finish processing the IB */
4295 r = dma_fence_wait(f, false);
4297 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4301 /* read back registers to clear the counters */
4302 mutex_lock(&adev->grbm_idx_mutex);
4303 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4304 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4305 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4306 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4307 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4311 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4312 mutex_unlock(&adev->grbm_idx_mutex);
4315 amdgpu_ib_free(adev, &ib, NULL);
4321 static int gfx_v9_0_early_init(void *handle)
4323 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4325 if (adev->asic_type == CHIP_ARCTURUS)
4326 adev->gfx.num_gfx_rings = 0;
4328 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4329 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4330 gfx_v9_0_set_ring_funcs(adev);
4331 gfx_v9_0_set_irq_funcs(adev);
4332 gfx_v9_0_set_gds_init(adev);
4333 gfx_v9_0_set_rlc_funcs(adev);
4338 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4339 struct ras_err_data *err_data,
4340 struct amdgpu_iv_entry *entry);
4342 static int gfx_v9_0_ecc_late_init(void *handle)
4344 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4345 struct ras_common_if **ras_if = &adev->gfx.ras_if;
4346 struct ras_ih_if ih_info = {
4347 .cb = gfx_v9_0_process_ras_data_cb,
4349 struct ras_fs_if fs_info = {
4350 .sysfs_name = "gfx_err_count",
4351 .debugfs_name = "gfx_err_inject",
4353 struct ras_common_if ras_block = {
4354 .block = AMDGPU_RAS_BLOCK__GFX,
4355 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4356 .sub_block_index = 0,
4361 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4362 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4366 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4370 /* requires IBs so do in late init after IB pool is initialized */
4371 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4375 /* handle resume path. */
4377 /* resend ras TA enable cmd during resume.
4378 * prepare to handle failure.
4380 ih_info.head = **ras_if;
4381 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4384 /* request a gpu reset. will run again. */
4385 amdgpu_ras_request_reset_on_boot(adev,
4386 AMDGPU_RAS_BLOCK__GFX);
4389 /* fail to enable ras, cleanup all. */
4392 /* enable successfully. continue. */
4396 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4400 **ras_if = ras_block;
4402 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4405 amdgpu_ras_request_reset_on_boot(adev,
4406 AMDGPU_RAS_BLOCK__GFX);
4412 ih_info.head = **ras_if;
4413 fs_info.head = **ras_if;
4415 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4419 amdgpu_ras_debugfs_create(adev, &fs_info);
4421 r = amdgpu_ras_sysfs_create(adev, &fs_info);
4425 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4431 amdgpu_ras_sysfs_remove(adev, *ras_if);
4433 amdgpu_ras_debugfs_remove(adev, *ras_if);
4434 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4436 amdgpu_ras_feature_enable(adev, *ras_if, 0);
4443 static int gfx_v9_0_late_init(void *handle)
4445 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4448 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4452 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4456 r = gfx_v9_0_ecc_late_init(handle);
4463 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4465 uint32_t rlc_setting;
4467 /* if RLC is not enabled, do nothing */
4468 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4469 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4475 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4480 data = RLC_SAFE_MODE__CMD_MASK;
4481 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4482 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4484 /* wait for RLC_SAFE_MODE */
4485 for (i = 0; i < adev->usec_timeout; i++) {
4486 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4492 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4496 data = RLC_SAFE_MODE__CMD_MASK;
4497 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4500 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4503 amdgpu_gfx_rlc_enter_safe_mode(adev);
4505 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4506 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4507 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4508 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4510 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4511 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4514 amdgpu_gfx_rlc_exit_safe_mode(adev);
4517 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4520 /* TODO: double check if we need to perform under safe mode */
4521 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4523 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4524 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4526 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4528 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4529 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4531 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4533 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4536 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4541 amdgpu_gfx_rlc_enter_safe_mode(adev);
4543 /* It is disabled by HW by default */
4544 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4545 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4546 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4548 if (adev->asic_type != CHIP_VEGA12)
4549 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4551 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4552 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4553 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4555 /* only for Vega10 & Raven1 */
4556 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4559 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4561 /* MGLS is a global flag to control all MGLS in GFX */
4562 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4563 /* 2 - RLC memory Light sleep */
4564 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4565 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4566 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4568 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4570 /* 3 - CP memory Light sleep */
4571 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4572 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4573 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4575 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4579 /* 1 - MGCG_OVERRIDE */
4580 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4582 if (adev->asic_type != CHIP_VEGA12)
4583 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4585 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4586 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4587 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4588 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4591 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4593 /* 2 - disable MGLS in RLC */
4594 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4595 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4596 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4597 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4600 /* 3 - disable MGLS in CP */
4601 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4602 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4603 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4604 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4608 amdgpu_gfx_rlc_exit_safe_mode(adev);
4611 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4616 amdgpu_gfx_rlc_enter_safe_mode(adev);
4618 /* Enable 3D CGCG/CGLS */
4619 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4620 /* write cmd to clear cgcg/cgls ov */
4621 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4622 /* unset CGCG override */
4623 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4624 /* update CGCG and CGLS override bits */
4626 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4628 /* enable 3Dcgcg FSM(0x0000363f) */
4629 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4631 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4632 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4633 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4634 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4635 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4637 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4639 /* set IDLE_POLL_COUNT(0x00900100) */
4640 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4641 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4642 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4644 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4646 /* Disable CGCG/CGLS */
4647 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4648 /* disable cgcg, cgls should be disabled */
4649 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4650 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4651 /* disable cgcg and cgls in FSM */
4653 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4656 amdgpu_gfx_rlc_exit_safe_mode(adev);
4659 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4664 amdgpu_gfx_rlc_enter_safe_mode(adev);
4666 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4667 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4668 /* unset CGCG override */
4669 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4670 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4671 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4673 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4674 /* update CGCG and CGLS override bits */
4676 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4678 /* enable cgcg FSM(0x0000363F) */
4679 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4681 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4682 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4683 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4684 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4685 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4687 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4689 /* set IDLE_POLL_COUNT(0x00900100) */
4690 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4691 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4692 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4694 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4696 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4697 /* reset CGCG/CGLS bits */
4698 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4699 /* disable cgcg and cgls in FSM */
4701 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4704 amdgpu_gfx_rlc_exit_safe_mode(adev);
4707 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4711 /* CGCG/CGLS should be enabled after MGCG/MGLS
4712 * === MGCG + MGLS ===
4714 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4715 /* === CGCG /CGLS for GFX 3D Only === */
4716 gfx_v9_0_update_3d_clock_gating(adev, enable);
4717 /* === CGCG + CGLS === */
4718 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4720 /* CGCG/CGLS should be disabled before MGCG/MGLS
4721 * === CGCG + CGLS ===
4723 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4724 /* === CGCG /CGLS for GFX 3D Only === */
4725 gfx_v9_0_update_3d_clock_gating(adev, enable);
4726 /* === MGCG + MGLS === */
4727 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4732 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4733 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4734 .set_safe_mode = gfx_v9_0_set_safe_mode,
4735 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4736 .init = gfx_v9_0_rlc_init,
4737 .get_csb_size = gfx_v9_0_get_csb_size,
4738 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4739 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4740 .resume = gfx_v9_0_rlc_resume,
4741 .stop = gfx_v9_0_rlc_stop,
4742 .reset = gfx_v9_0_rlc_reset,
4743 .start = gfx_v9_0_rlc_start
4746 static int gfx_v9_0_set_powergating_state(void *handle,
4747 enum amd_powergating_state state)
4749 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4750 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4752 switch (adev->asic_type) {
4755 amdgpu_gfx_off_ctrl(adev, false);
4756 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4758 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4759 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4760 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4762 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4763 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4766 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4767 gfx_v9_0_enable_cp_power_gating(adev, true);
4769 gfx_v9_0_enable_cp_power_gating(adev, false);
4771 /* update gfx cgpg state */
4772 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4774 /* update mgcg state */
4775 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4778 amdgpu_gfx_off_ctrl(adev, true);
4782 amdgpu_gfx_off_ctrl(adev, false);
4783 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4785 amdgpu_gfx_off_ctrl(adev, true);
4795 static int gfx_v9_0_set_clockgating_state(void *handle,
4796 enum amd_clockgating_state state)
4798 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4800 if (amdgpu_sriov_vf(adev))
4803 switch (adev->asic_type) {
4808 gfx_v9_0_update_gfx_clock_gating(adev,
4809 state == AMD_CG_STATE_GATE ? true : false);
4817 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4819 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4822 if (amdgpu_sriov_vf(adev))
4825 /* AMD_CG_SUPPORT_GFX_MGCG */
4826 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4827 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4828 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4830 /* AMD_CG_SUPPORT_GFX_CGCG */
4831 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4832 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4833 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4835 /* AMD_CG_SUPPORT_GFX_CGLS */
4836 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4837 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4839 /* AMD_CG_SUPPORT_GFX_RLC_LS */
4840 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4841 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4842 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4844 /* AMD_CG_SUPPORT_GFX_CP_LS */
4845 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4846 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4847 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4849 if (adev->asic_type != CHIP_ARCTURUS) {
4850 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4851 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4852 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4853 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4855 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4856 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4857 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4861 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4863 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4866 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4868 struct amdgpu_device *adev = ring->adev;
4871 /* XXX check if swapping is necessary on BE */
4872 if (ring->use_doorbell) {
4873 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4875 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4876 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4882 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4884 struct amdgpu_device *adev = ring->adev;
4886 if (ring->use_doorbell) {
4887 /* XXX check if swapping is necessary on BE */
4888 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4889 WDOORBELL64(ring->doorbell_index, ring->wptr);
4891 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4892 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4896 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4898 struct amdgpu_device *adev = ring->adev;
4899 u32 ref_and_mask, reg_mem_engine;
4900 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4902 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4905 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4908 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4915 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4916 reg_mem_engine = 1; /* pfp */
4919 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4920 adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4921 adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4922 ref_and_mask, ref_and_mask, 0x20);
4925 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4926 struct amdgpu_job *job,
4927 struct amdgpu_ib *ib,
4930 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4931 u32 header, control = 0;
4933 if (ib->flags & AMDGPU_IB_FLAG_CE)
4934 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4936 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4938 control |= ib->length_dw | (vmid << 24);
4940 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4941 control |= INDIRECT_BUFFER_PRE_ENB(1);
4943 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4944 gfx_v9_0_ring_emit_de_meta(ring);
4947 amdgpu_ring_write(ring, header);
4948 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4949 amdgpu_ring_write(ring,
4953 lower_32_bits(ib->gpu_addr));
4954 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4955 amdgpu_ring_write(ring, control);
4958 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4959 struct amdgpu_job *job,
4960 struct amdgpu_ib *ib,
4963 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4964 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4966 /* Currently, there is a high possibility to get wave ID mismatch
4967 * between ME and GDS, leading to a hw deadlock, because ME generates
4968 * different wave IDs than the GDS expects. This situation happens
4969 * randomly when at least 5 compute pipes use GDS ordered append.
4970 * The wave IDs generated by ME are also wrong after suspend/resume.
4971 * Those are probably bugs somewhere else in the kernel driver.
4973 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4974 * GDS to 0 for this ring (me/pipe).
4976 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4977 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4978 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4979 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4982 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4983 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4984 amdgpu_ring_write(ring,
4988 lower_32_bits(ib->gpu_addr));
4989 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4990 amdgpu_ring_write(ring, control);
4993 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4994 u64 seq, unsigned flags)
4996 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4997 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4998 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5000 /* RELEASE_MEM - flush caches, send int */
5001 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5002 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5003 EOP_TC_NC_ACTION_EN) :
5004 (EOP_TCL1_ACTION_EN |
5006 EOP_TC_WB_ACTION_EN |
5007 EOP_TC_MD_ACTION_EN)) |
5008 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5010 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5013 * the address should be Qword aligned if 64bit write, Dword
5014 * aligned if only send 32bit data low (discard data high)
5020 amdgpu_ring_write(ring, lower_32_bits(addr));
5021 amdgpu_ring_write(ring, upper_32_bits(addr));
5022 amdgpu_ring_write(ring, lower_32_bits(seq));
5023 amdgpu_ring_write(ring, upper_32_bits(seq));
5024 amdgpu_ring_write(ring, 0);
5027 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5029 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5030 uint32_t seq = ring->fence_drv.sync_seq;
5031 uint64_t addr = ring->fence_drv.gpu_addr;
5033 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5034 lower_32_bits(addr), upper_32_bits(addr),
5035 seq, 0xffffffff, 4);
5038 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5039 unsigned vmid, uint64_t pd_addr)
5041 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5043 /* compute doesn't have PFP */
5044 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5045 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5046 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5047 amdgpu_ring_write(ring, 0x0);
5051 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5053 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5056 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5060 /* XXX check if swapping is necessary on BE */
5061 if (ring->use_doorbell)
5062 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5068 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5071 struct amdgpu_device *adev = ring->adev;
5072 int pipe_num, tmp, reg;
5073 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5075 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5077 /* first me only has 2 entries, GFX and HP3D */
5081 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5083 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5087 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5088 struct amdgpu_ring *ring,
5093 struct amdgpu_ring *iring;
5095 mutex_lock(&adev->gfx.pipe_reserve_mutex);
5096 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5098 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5100 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5102 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5103 /* Clear all reservations - everyone reacquires all resources */
5104 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5105 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5108 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5109 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5112 /* Lower all pipes without a current reservation */
5113 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5114 iring = &adev->gfx.gfx_ring[i];
5115 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5119 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5120 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5123 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5124 iring = &adev->gfx.compute_ring[i];
5125 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5129 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5130 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5134 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5137 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5138 struct amdgpu_ring *ring,
5141 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5142 uint32_t queue_priority = acquire ? 0xf : 0x0;
5144 mutex_lock(&adev->srbm_mutex);
5145 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5147 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5148 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5150 soc15_grbm_select(adev, 0, 0, 0, 0);
5151 mutex_unlock(&adev->srbm_mutex);
5154 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5155 enum drm_sched_priority priority)
5157 struct amdgpu_device *adev = ring->adev;
5158 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5160 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5163 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5164 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5167 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5169 struct amdgpu_device *adev = ring->adev;
5171 /* XXX check if swapping is necessary on BE */
5172 if (ring->use_doorbell) {
5173 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5174 WDOORBELL64(ring->doorbell_index, ring->wptr);
5176 BUG(); /* only DOORBELL method supported on gfx9 now */
5180 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5181 u64 seq, unsigned int flags)
5183 struct amdgpu_device *adev = ring->adev;
5185 /* we only allocate 32bit for each seq wb address */
5186 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5188 /* write fence seq to the "addr" */
5189 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5192 amdgpu_ring_write(ring, lower_32_bits(addr));
5193 amdgpu_ring_write(ring, upper_32_bits(addr));
5194 amdgpu_ring_write(ring, lower_32_bits(seq));
5196 if (flags & AMDGPU_FENCE_FLAG_INT) {
5197 /* set register to trigger INT */
5198 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5199 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5200 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5201 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5202 amdgpu_ring_write(ring, 0);
5203 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5207 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5209 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5210 amdgpu_ring_write(ring, 0);
5213 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5215 struct v9_ce_ib_state ce_payload = {0};
5219 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5220 csa_addr = amdgpu_csa_vaddr(ring->adev);
5222 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5223 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5224 WRITE_DATA_DST_SEL(8) |
5226 WRITE_DATA_CACHE_POLICY(0));
5227 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5228 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5229 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5232 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5234 struct v9_de_ib_state de_payload = {0};
5235 uint64_t csa_addr, gds_addr;
5238 csa_addr = amdgpu_csa_vaddr(ring->adev);
5239 gds_addr = csa_addr + 4096;
5240 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5241 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5243 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5244 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5245 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5246 WRITE_DATA_DST_SEL(8) |
5248 WRITE_DATA_CACHE_POLICY(0));
5249 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5250 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5251 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5254 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5256 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5257 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5260 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5264 if (amdgpu_sriov_vf(ring->adev))
5265 gfx_v9_0_ring_emit_ce_meta(ring);
5267 gfx_v9_0_ring_emit_tmz(ring, true);
5269 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5270 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5271 /* set load_global_config & load_global_uconfig */
5273 /* set load_cs_sh_regs */
5275 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5278 /* set load_ce_ram if preamble presented */
5279 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5282 /* still load_ce_ram if this is the first time preamble presented
5283 * although there is no context switch happens.
5285 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5289 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5290 amdgpu_ring_write(ring, dw2);
5291 amdgpu_ring_write(ring, 0);
5294 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5297 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5298 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5299 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5300 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5301 ret = ring->wptr & ring->buf_mask;
5302 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5306 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5309 BUG_ON(offset > ring->buf_mask);
5310 BUG_ON(ring->ring[offset] != 0x55aa55aa);
5312 cur = (ring->wptr & ring->buf_mask) - 1;
5313 if (likely(cur > offset))
5314 ring->ring[offset] = cur - offset;
5316 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5319 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5321 struct amdgpu_device *adev = ring->adev;
5323 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5324 amdgpu_ring_write(ring, 0 | /* src: register*/
5325 (5 << 8) | /* dst: memory */
5326 (1 << 20)); /* write confirm */
5327 amdgpu_ring_write(ring, reg);
5328 amdgpu_ring_write(ring, 0);
5329 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5330 adev->virt.reg_val_offs * 4));
5331 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5332 adev->virt.reg_val_offs * 4));
5335 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5340 switch (ring->funcs->type) {
5341 case AMDGPU_RING_TYPE_GFX:
5342 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5344 case AMDGPU_RING_TYPE_KIQ:
5345 cmd = (1 << 16); /* no inc addr */
5351 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5352 amdgpu_ring_write(ring, cmd);
5353 amdgpu_ring_write(ring, reg);
5354 amdgpu_ring_write(ring, 0);
5355 amdgpu_ring_write(ring, val);
5358 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5359 uint32_t val, uint32_t mask)
5361 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5364 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5365 uint32_t reg0, uint32_t reg1,
5366 uint32_t ref, uint32_t mask)
5368 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5369 struct amdgpu_device *adev = ring->adev;
5370 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5371 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5374 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5377 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5381 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5383 struct amdgpu_device *adev = ring->adev;
5386 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5387 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5388 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5389 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5390 WREG32(mmSQ_CMD, value);
5393 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5394 enum amdgpu_interrupt_state state)
5397 case AMDGPU_IRQ_STATE_DISABLE:
5398 case AMDGPU_IRQ_STATE_ENABLE:
5399 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5400 TIME_STAMP_INT_ENABLE,
5401 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5408 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5410 enum amdgpu_interrupt_state state)
5412 u32 mec_int_cntl, mec_int_cntl_reg;
5415 * amdgpu controls only the first MEC. That's why this function only
5416 * handles the setting of interrupts for this specific MEC. All other
5417 * pipes' interrupts are set by amdkfd.
5423 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5426 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5429 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5432 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5435 DRM_DEBUG("invalid pipe %d\n", pipe);
5439 DRM_DEBUG("invalid me %d\n", me);
5444 case AMDGPU_IRQ_STATE_DISABLE:
5445 mec_int_cntl = RREG32(mec_int_cntl_reg);
5446 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5447 TIME_STAMP_INT_ENABLE, 0);
5448 WREG32(mec_int_cntl_reg, mec_int_cntl);
5450 case AMDGPU_IRQ_STATE_ENABLE:
5451 mec_int_cntl = RREG32(mec_int_cntl_reg);
5452 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5453 TIME_STAMP_INT_ENABLE, 1);
5454 WREG32(mec_int_cntl_reg, mec_int_cntl);
5461 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5462 struct amdgpu_irq_src *source,
5464 enum amdgpu_interrupt_state state)
5467 case AMDGPU_IRQ_STATE_DISABLE:
5468 case AMDGPU_IRQ_STATE_ENABLE:
5469 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5470 PRIV_REG_INT_ENABLE,
5471 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5480 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5481 struct amdgpu_irq_src *source,
5483 enum amdgpu_interrupt_state state)
5486 case AMDGPU_IRQ_STATE_DISABLE:
5487 case AMDGPU_IRQ_STATE_ENABLE:
5488 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5489 PRIV_INSTR_INT_ENABLE,
5490 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5498 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
5499 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5500 CP_ECC_ERROR_INT_ENABLE, 1)
5502 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
5503 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5504 CP_ECC_ERROR_INT_ENABLE, 0)
5506 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5507 struct amdgpu_irq_src *source,
5509 enum amdgpu_interrupt_state state)
5512 case AMDGPU_IRQ_STATE_DISABLE:
5513 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5514 CP_ECC_ERROR_INT_ENABLE, 0);
5515 DISABLE_ECC_ON_ME_PIPE(1, 0);
5516 DISABLE_ECC_ON_ME_PIPE(1, 1);
5517 DISABLE_ECC_ON_ME_PIPE(1, 2);
5518 DISABLE_ECC_ON_ME_PIPE(1, 3);
5521 case AMDGPU_IRQ_STATE_ENABLE:
5522 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5523 CP_ECC_ERROR_INT_ENABLE, 1);
5524 ENABLE_ECC_ON_ME_PIPE(1, 0);
5525 ENABLE_ECC_ON_ME_PIPE(1, 1);
5526 ENABLE_ECC_ON_ME_PIPE(1, 2);
5527 ENABLE_ECC_ON_ME_PIPE(1, 3);
5537 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5538 struct amdgpu_irq_src *src,
5540 enum amdgpu_interrupt_state state)
5543 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5544 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5546 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5547 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5549 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5550 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5552 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5553 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5555 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5556 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5558 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5559 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5561 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5562 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5564 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5565 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5567 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5568 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5576 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5577 struct amdgpu_irq_src *source,
5578 struct amdgpu_iv_entry *entry)
5581 u8 me_id, pipe_id, queue_id;
5582 struct amdgpu_ring *ring;
5584 DRM_DEBUG("IH: CP EOP\n");
5585 me_id = (entry->ring_id & 0x0c) >> 2;
5586 pipe_id = (entry->ring_id & 0x03) >> 0;
5587 queue_id = (entry->ring_id & 0x70) >> 4;
5591 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5595 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5596 ring = &adev->gfx.compute_ring[i];
5597 /* Per-queue interrupt is supported for MEC starting from VI.
5598 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5600 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5601 amdgpu_fence_process(ring);
5608 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5609 struct amdgpu_iv_entry *entry)
5611 u8 me_id, pipe_id, queue_id;
5612 struct amdgpu_ring *ring;
5615 me_id = (entry->ring_id & 0x0c) >> 2;
5616 pipe_id = (entry->ring_id & 0x03) >> 0;
5617 queue_id = (entry->ring_id & 0x70) >> 4;
5621 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5625 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5626 ring = &adev->gfx.compute_ring[i];
5627 if (ring->me == me_id && ring->pipe == pipe_id &&
5628 ring->queue == queue_id)
5629 drm_sched_fault(&ring->sched);
5635 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5636 struct amdgpu_irq_src *source,
5637 struct amdgpu_iv_entry *entry)
5639 DRM_ERROR("Illegal register access in command stream\n");
5640 gfx_v9_0_fault(adev, entry);
5644 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5645 struct amdgpu_irq_src *source,
5646 struct amdgpu_iv_entry *entry)
5648 DRM_ERROR("Illegal instruction in command stream\n");
5649 gfx_v9_0_fault(adev, entry);
5653 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5654 struct ras_err_data *err_data,
5655 struct amdgpu_iv_entry *entry)
5657 /* TODO ue will trigger an interrupt. */
5658 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5659 if (adev->gfx.funcs->query_ras_error_count)
5660 adev->gfx.funcs->query_ras_error_count(adev, err_data);
5661 amdgpu_ras_reset_gpu(adev, 0);
5662 return AMDGPU_RAS_SUCCESS;
5665 static const struct {
5670 uint32_t reg_offset;
5671 uint32_t per_se_instance;
5672 int32_t num_instance;
5673 uint32_t sec_count_mask;
5674 uint32_t ded_count_mask;
5675 } gfx_ras_edc_regs[] = {
5676 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5677 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5678 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5679 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5680 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5681 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5682 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5683 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5684 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5685 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5686 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5687 REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5688 REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5689 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5690 REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5691 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5692 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5693 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5694 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5695 REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5696 REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5697 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5698 REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5699 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5700 REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5701 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5702 REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5703 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5704 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5705 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5706 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5707 REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5708 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5709 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5710 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5711 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5712 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5713 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5714 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5715 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5716 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5717 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5718 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5719 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5720 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5721 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5722 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5723 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5724 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5725 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5726 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5727 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5728 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5729 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5730 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5731 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5732 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5733 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5734 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5735 REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5736 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5737 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5738 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5739 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5740 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5741 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5742 REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5743 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5744 REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5745 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5746 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5747 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5748 REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5749 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5750 REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5751 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5752 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5753 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5754 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5755 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5756 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5757 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5758 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5759 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5760 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5761 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5762 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5763 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5764 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5765 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5766 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5767 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5768 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5769 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5770 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5771 REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5772 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5773 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5774 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5775 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5776 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5777 REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5778 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5779 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5780 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5781 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5782 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5783 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5784 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5785 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5787 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5788 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5789 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5790 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5792 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5793 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5794 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5795 REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5796 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5797 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5798 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5799 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5800 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5801 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5802 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5803 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5804 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5805 REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5806 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5807 REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5808 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5809 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5810 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5811 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5812 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5813 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5814 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5815 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5816 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5817 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5818 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5819 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5820 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5821 REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5822 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5823 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5824 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5825 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5826 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5827 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5828 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5829 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5830 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5831 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5832 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5833 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5834 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5835 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5836 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5837 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5838 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5839 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5840 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5841 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5842 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5843 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5844 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5845 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5846 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5847 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5848 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5849 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5850 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5851 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5852 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5853 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5854 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5855 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5856 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5857 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5858 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5859 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5860 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5861 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5862 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5863 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5864 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5865 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5866 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5867 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5868 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5869 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5870 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5871 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5872 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5873 { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5874 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5875 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5877 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5878 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5879 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5880 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5881 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5882 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5883 { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5884 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5885 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5886 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5887 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5888 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5889 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5890 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5891 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5892 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5893 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5894 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5895 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5896 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5897 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5898 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5899 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5900 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5901 { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5902 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5903 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5905 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5906 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5907 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5908 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5909 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5910 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5911 { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5912 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5913 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5914 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5915 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5916 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5917 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5918 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5919 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5920 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5921 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5922 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5923 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5924 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5925 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
5926 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5927 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5928 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
5929 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5930 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
5931 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5932 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
5933 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5934 REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
5935 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5936 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
5937 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5938 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
5939 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5940 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5941 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
5942 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5943 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5944 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
5945 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5946 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5947 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
5948 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5949 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
5950 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5951 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
5952 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5953 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
5954 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5955 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
5956 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5957 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
5958 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5959 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
5962 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5965 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5967 struct ta_ras_trigger_error_input block_info = { 0 };
5969 if (adev->asic_type != CHIP_VEGA20)
5972 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5975 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5977 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5978 ras_gfx_subblocks[info->head.sub_block_index].name,
5983 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5985 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5986 ras_gfx_subblocks[info->head.sub_block_index].name,
5991 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
5992 block_info.sub_block_index =
5993 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
5994 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
5995 block_info.address = info->address;
5996 block_info.value = info->value;
5998 mutex_lock(&adev->grbm_idx_mutex);
5999 ret = psp_ras_trigger_error(&adev->psp, &block_info);
6000 mutex_unlock(&adev->grbm_idx_mutex);
6005 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6006 void *ras_error_status)
6008 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6009 uint32_t sec_count, ded_count;
6012 uint32_t se_id, instance_id;
6014 if (adev->asic_type != CHIP_VEGA20)
6017 err_data->ue_count = 0;
6018 err_data->ce_count = 0;
6020 mutex_lock(&adev->grbm_idx_mutex);
6021 for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6022 for (instance_id = 0; instance_id < 256; instance_id++) {
6024 i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6027 !gfx_ras_edc_regs[i].per_se_instance)
6029 if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6032 gfx_v9_0_select_se_sh(adev, se_id, 0,
6036 adev->reg_offset[gfx_ras_edc_regs[i].ip]
6037 [gfx_ras_edc_regs[i].inst]
6038 [gfx_ras_edc_regs[i].seg] +
6039 gfx_ras_edc_regs[i].reg_offset);
6040 sec_count = reg_value &
6041 gfx_ras_edc_regs[i].sec_count_mask;
6042 ded_count = reg_value &
6043 gfx_ras_edc_regs[i].ded_count_mask;
6046 "Instance[%d][%d]: SubBlock %s, SEC %d\n",
6048 gfx_ras_edc_regs[i].name,
6050 err_data->ce_count++;
6055 "Instance[%d][%d]: SubBlock %s, DED %d\n",
6057 gfx_ras_edc_regs[i].name,
6059 err_data->ue_count++;
6064 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6065 mutex_unlock(&adev->grbm_idx_mutex);
6070 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6071 struct amdgpu_irq_src *source,
6072 struct amdgpu_iv_entry *entry)
6074 struct ras_common_if *ras_if = adev->gfx.ras_if;
6075 struct ras_dispatch_if ih_data = {
6082 ih_data.head = *ras_if;
6084 DRM_ERROR("CP ECC ERROR IRQ\n");
6085 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6089 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6091 .early_init = gfx_v9_0_early_init,
6092 .late_init = gfx_v9_0_late_init,
6093 .sw_init = gfx_v9_0_sw_init,
6094 .sw_fini = gfx_v9_0_sw_fini,
6095 .hw_init = gfx_v9_0_hw_init,
6096 .hw_fini = gfx_v9_0_hw_fini,
6097 .suspend = gfx_v9_0_suspend,
6098 .resume = gfx_v9_0_resume,
6099 .is_idle = gfx_v9_0_is_idle,
6100 .wait_for_idle = gfx_v9_0_wait_for_idle,
6101 .soft_reset = gfx_v9_0_soft_reset,
6102 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6103 .set_powergating_state = gfx_v9_0_set_powergating_state,
6104 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6107 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6108 .type = AMDGPU_RING_TYPE_GFX,
6110 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6111 .support_64bit_ptrs = true,
6112 .vmhub = AMDGPU_GFXHUB_0,
6113 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6114 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6115 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6116 .emit_frame_size = /* totally 242 maximum if 16 IBs */
6118 7 + /* PIPELINE_SYNC */
6119 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6120 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6122 8 + /* FENCE for VM_FLUSH */
6123 20 + /* GDS switch */
6124 4 + /* double SWITCH_BUFFER,
6125 the first COND_EXEC jump to the place just
6126 prior to this double SWITCH_BUFFER */
6134 8 + 8 + /* FENCE x2 */
6135 2, /* SWITCH_BUFFER */
6136 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6137 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6138 .emit_fence = gfx_v9_0_ring_emit_fence,
6139 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6140 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6141 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6142 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6143 .test_ring = gfx_v9_0_ring_test_ring,
6144 .test_ib = gfx_v9_0_ring_test_ib,
6145 .insert_nop = amdgpu_ring_insert_nop,
6146 .pad_ib = amdgpu_ring_generic_pad_ib,
6147 .emit_switch_buffer = gfx_v9_ring_emit_sb,
6148 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6149 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6150 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6151 .emit_tmz = gfx_v9_0_ring_emit_tmz,
6152 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6153 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6154 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6155 .soft_recovery = gfx_v9_0_ring_soft_recovery,
6158 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6159 .type = AMDGPU_RING_TYPE_COMPUTE,
6161 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6162 .support_64bit_ptrs = true,
6163 .vmhub = AMDGPU_GFXHUB_0,
6164 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6165 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6166 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6168 20 + /* gfx_v9_0_ring_emit_gds_switch */
6169 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6170 5 + /* hdp invalidate */
6171 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6172 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6173 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6174 2 + /* gfx_v9_0_ring_emit_vm_flush */
6175 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6176 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6177 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6178 .emit_fence = gfx_v9_0_ring_emit_fence,
6179 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6180 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6181 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6182 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6183 .test_ring = gfx_v9_0_ring_test_ring,
6184 .test_ib = gfx_v9_0_ring_test_ib,
6185 .insert_nop = amdgpu_ring_insert_nop,
6186 .pad_ib = amdgpu_ring_generic_pad_ib,
6187 .set_priority = gfx_v9_0_ring_set_priority_compute,
6188 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6189 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6190 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6193 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6194 .type = AMDGPU_RING_TYPE_KIQ,
6196 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6197 .support_64bit_ptrs = true,
6198 .vmhub = AMDGPU_GFXHUB_0,
6199 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6200 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6201 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6203 20 + /* gfx_v9_0_ring_emit_gds_switch */
6204 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6205 5 + /* hdp invalidate */
6206 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6207 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6208 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6209 2 + /* gfx_v9_0_ring_emit_vm_flush */
6210 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6211 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6212 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6213 .test_ring = gfx_v9_0_ring_test_ring,
6214 .insert_nop = amdgpu_ring_insert_nop,
6215 .pad_ib = amdgpu_ring_generic_pad_ib,
6216 .emit_rreg = gfx_v9_0_ring_emit_rreg,
6217 .emit_wreg = gfx_v9_0_ring_emit_wreg,
6218 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6219 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6222 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6226 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6228 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6229 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6231 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6232 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6235 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6236 .set = gfx_v9_0_set_eop_interrupt_state,
6237 .process = gfx_v9_0_eop_irq,
6240 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6241 .set = gfx_v9_0_set_priv_reg_fault_state,
6242 .process = gfx_v9_0_priv_reg_irq,
6245 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6246 .set = gfx_v9_0_set_priv_inst_fault_state,
6247 .process = gfx_v9_0_priv_inst_irq,
6250 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6251 .set = gfx_v9_0_set_cp_ecc_error_state,
6252 .process = gfx_v9_0_cp_ecc_error_irq,
6256 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6258 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6259 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6261 adev->gfx.priv_reg_irq.num_types = 1;
6262 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6264 adev->gfx.priv_inst_irq.num_types = 1;
6265 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6267 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6268 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6271 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6273 switch (adev->asic_type) {
6279 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6286 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6288 /* init asci gds info */
6289 switch (adev->asic_type) {
6293 adev->gds.gds_size = 0x10000;
6297 adev->gds.gds_size = 0x1000;
6300 adev->gds.gds_size = 0x10000;
6304 switch (adev->asic_type) {
6307 adev->gds.gds_compute_max_wave_id = 0x7ff;
6310 adev->gds.gds_compute_max_wave_id = 0x27f;
6313 if (adev->rev_id >= 0x8)
6314 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6316 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6319 adev->gds.gds_compute_max_wave_id = 0xfff;
6322 /* this really depends on the chip */
6323 adev->gds.gds_compute_max_wave_id = 0x7ff;
6327 adev->gds.gws_size = 64;
6328 adev->gds.oa_size = 16;
6331 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6339 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6340 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6342 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6345 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6349 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6350 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6352 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6353 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6355 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6357 return (~data) & mask;
6360 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6361 struct amdgpu_cu_info *cu_info)
6363 int i, j, k, counter, active_cu_number = 0;
6364 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6365 unsigned disable_masks[4 * 4];
6367 if (!adev || !cu_info)
6371 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6373 if (adev->gfx.config.max_shader_engines *
6374 adev->gfx.config.max_sh_per_se > 16)
6377 amdgpu_gfx_parse_disable_cu(disable_masks,
6378 adev->gfx.config.max_shader_engines,
6379 adev->gfx.config.max_sh_per_se);
6381 mutex_lock(&adev->grbm_idx_mutex);
6382 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6383 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6387 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6388 gfx_v9_0_set_user_cu_inactive_bitmap(
6389 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6390 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6393 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6394 * 4x4 size array, and it's usually suitable for Vega
6395 * ASICs which has 4*2 SE/SH layout.
6396 * But for Arcturus, SE/SH layout is changed to 8*1.
6397 * To mostly reduce the impact, we make it compatible
6398 * with current bitmap array as below:
6399 * SE4,SH0 --> bitmap[0][1]
6400 * SE5,SH0 --> bitmap[1][1]
6401 * SE6,SH0 --> bitmap[2][1]
6402 * SE7,SH0 --> bitmap[3][1]
6404 cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6406 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6407 if (bitmap & mask) {
6408 if (counter < adev->gfx.config.max_cu_per_sh)
6414 active_cu_number += counter;
6416 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6417 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6420 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6421 mutex_unlock(&adev->grbm_idx_mutex);
6423 cu_info->number = active_cu_number;
6424 cu_info->ao_cu_mask = ao_cu_mask;
6425 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6430 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6432 .type = AMD_IP_BLOCK_TYPE_GFX,
6436 .funcs = &gfx_v9_0_ip_funcs,