Merge tag 'usb-5.16-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
[linux-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129
130 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
132 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
134 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
136 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
138 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
140 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
142
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
147
148 enum ta_ras_gfx_subblock {
149         /*CPC*/
150         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152         TA_RAS_BLOCK__GFX_CPC_UCODE,
153         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
160         /* CPF*/
161         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164         TA_RAS_BLOCK__GFX_CPF_TAG,
165         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
166         /* CPG*/
167         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170         TA_RAS_BLOCK__GFX_CPG_TAG,
171         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
172         /* GDS*/
173         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
180         /* SPI*/
181         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
182         /* SQ*/
183         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185         TA_RAS_BLOCK__GFX_SQ_LDS_D,
186         TA_RAS_BLOCK__GFX_SQ_LDS_I,
187         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
189         /* SQC (3 ranges)*/
190         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191         /* SQC range 0*/
192         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203         /* SQC range 1*/
204         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217         /* SQC range 2*/
218         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
232         /* TA*/
233         TA_RAS_BLOCK__GFX_TA_INDEX_START,
234         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
240         /* TCA*/
241         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245         /* TCC (5 sub-ranges)*/
246         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247         /* TCC range 0*/
248         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
258         /* TCC range 1*/
259         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264         /* TCC range 2*/
265         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276         /* TCC range 3*/
277         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282         /* TCC range 4*/
283         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
290         /* TCI*/
291         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
292         /* TCP*/
293         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
302         /* TD*/
303         TA_RAS_BLOCK__GFX_TD_INDEX_START,
304         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308         /* EA (3 sub-ranges)*/
309         TA_RAS_BLOCK__GFX_EA_INDEX_START,
310         /* EA range 0*/
311         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
321         /* EA range 1*/
322         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
331         /* EA range 2*/
332         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
339         /* UTC VM L2 bank*/
340         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
341         /* UTC VM walker*/
342         TA_RAS_BLOCK__UTC_VML2_WALKER,
343         /* UTC ATC L2 2MB cache*/
344         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345         /* UTC ATC L2 4KB cache*/
346         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347         TA_RAS_BLOCK__GFX_MAX
348 };
349
350 struct ras_gfx_subblock {
351         unsigned char *name;
352         int ta_subblock;
353         int hw_supported_error_type;
354         int sw_supported_error_type;
355 };
356
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
358         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
359                 #subblock,                                                     \
360                 TA_RAS_BLOCK__##subblock,                                      \
361                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
362                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
363         }
364
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
383                              0),
384         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
385                              0),
386         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394                              0, 0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396                              0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398                              0, 0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402                              0, 0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
406                              1),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
408                              0, 0, 0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
420                              0, 0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
426                              0, 0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430                              0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
432                              0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
434                              0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436                              0),
437         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
438                              0, 0),
439         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
440                              0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
450                              1),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
452                              1),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
454                              1),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
456                              0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
458                              0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
471                              0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
474                              0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
476                              0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
478                              0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
513 };
514
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
516 {
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
537 };
538
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
540 {
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
559 };
560
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
562 {
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
574 };
575
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
577 {
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
602 };
603
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
605 {
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
613 };
614
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
616 {
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
636 };
637
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
639 {
640         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
652 };
653
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
655 {
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
659 };
660
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
662 {
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
679 };
680
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
682 {
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
696 };
697
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
699 {
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
711 };
712
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
716 };
717
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
719 {
720         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 };
729
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
731 {
732         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 };
741
742 static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
743 {
744         static void *scratch_reg0;
745         static void *scratch_reg1;
746         static void *scratch_reg2;
747         static void *scratch_reg3;
748         static void *spare_int;
749         static uint32_t grbm_cntl;
750         static uint32_t grbm_idx;
751
752         scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
753         scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
754         scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
755         scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
756         spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
757
758         grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
759         grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
760
761         if (amdgpu_sriov_runtime(adev)) {
762                 pr_err("shouldn't call rlcg write register during runtime\n");
763                 return;
764         }
765
766         if (offset == grbm_cntl || offset == grbm_idx) {
767                 if (offset  == grbm_cntl)
768                         writel(v, scratch_reg2);
769                 else if (offset == grbm_idx)
770                         writel(v, scratch_reg3);
771
772                 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
773         } else {
774                 uint32_t i = 0;
775                 uint32_t retries = 50000;
776
777                 writel(v, scratch_reg0);
778                 writel(offset | 0x80000000, scratch_reg1);
779                 writel(1, spare_int);
780                 for (i = 0; i < retries; i++) {
781                         u32 tmp;
782
783                         tmp = readl(scratch_reg1);
784                         if (!(tmp & 0x80000000))
785                                 break;
786
787                         udelay(10);
788                 }
789                 if (i >= retries)
790                         pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
791         }
792
793 }
794
795 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,
796                                u32 v, u32 acc_flags, u32 hwip)
797 {
798         if ((acc_flags & AMDGPU_REGS_RLC) &&
799             amdgpu_sriov_fullaccess(adev)) {
800                 gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);
801
802                 return;
803         }
804
805         if (acc_flags & AMDGPU_REGS_NO_KIQ)
806                 WREG32_NO_KIQ(offset, v);
807         else
808                 WREG32(offset, v);
809 }
810
811 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
812 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
813 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
814 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
815
816 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
817 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
818 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
819 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
820 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
821                                 struct amdgpu_cu_info *cu_info);
822 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
823 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
824 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
825 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
826                                           void *ras_error_status);
827 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
828                                      void *inject_if);
829 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
830
831 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
832                                 uint64_t queue_mask)
833 {
834         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
835         amdgpu_ring_write(kiq_ring,
836                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
837                 /* vmid_mask:0* queue_type:0 (KIQ) */
838                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
839         amdgpu_ring_write(kiq_ring,
840                         lower_32_bits(queue_mask));     /* queue mask lo */
841         amdgpu_ring_write(kiq_ring,
842                         upper_32_bits(queue_mask));     /* queue mask hi */
843         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
844         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
845         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
846         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
847 }
848
849 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
850                                  struct amdgpu_ring *ring)
851 {
852         struct amdgpu_device *adev = kiq_ring->adev;
853         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
854         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
855         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
856
857         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
858         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
859         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
860                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
861                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
862                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
863                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
864                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
865                          /*queue_type: normal compute queue */
866                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
867                          /* alloc format: all_on_one_pipe */
868                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
869                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
870                          /* num_queues: must be 1 */
871                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
872         amdgpu_ring_write(kiq_ring,
873                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
874         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
875         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
876         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
877         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
878 }
879
880 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
881                                    struct amdgpu_ring *ring,
882                                    enum amdgpu_unmap_queues_action action,
883                                    u64 gpu_addr, u64 seq)
884 {
885         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
886
887         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
888         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
889                           PACKET3_UNMAP_QUEUES_ACTION(action) |
890                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
891                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
892                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
893         amdgpu_ring_write(kiq_ring,
894                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
895
896         if (action == PREEMPT_QUEUES_NO_UNMAP) {
897                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
898                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
899                 amdgpu_ring_write(kiq_ring, seq);
900         } else {
901                 amdgpu_ring_write(kiq_ring, 0);
902                 amdgpu_ring_write(kiq_ring, 0);
903                 amdgpu_ring_write(kiq_ring, 0);
904         }
905 }
906
907 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
908                                    struct amdgpu_ring *ring,
909                                    u64 addr,
910                                    u64 seq)
911 {
912         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
913
914         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
915         amdgpu_ring_write(kiq_ring,
916                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
917                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
918                           PACKET3_QUERY_STATUS_COMMAND(2));
919         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
920         amdgpu_ring_write(kiq_ring,
921                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
922                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
923         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
924         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
925         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
926         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
927 }
928
929 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
930                                 uint16_t pasid, uint32_t flush_type,
931                                 bool all_hub)
932 {
933         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
934         amdgpu_ring_write(kiq_ring,
935                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
936                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
937                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
938                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
939 }
940
941 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
942         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
943         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
944         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
945         .kiq_query_status = gfx_v9_0_kiq_query_status,
946         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
947         .set_resources_size = 8,
948         .map_queues_size = 7,
949         .unmap_queues_size = 6,
950         .query_status_size = 7,
951         .invalidate_tlbs_size = 2,
952 };
953
954 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
955 {
956         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
957 }
958
959 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
960 {
961         switch (adev->ip_versions[GC_HWIP][0]) {
962         case IP_VERSION(9, 0, 1):
963                 soc15_program_register_sequence(adev,
964                                                 golden_settings_gc_9_0,
965                                                 ARRAY_SIZE(golden_settings_gc_9_0));
966                 soc15_program_register_sequence(adev,
967                                                 golden_settings_gc_9_0_vg10,
968                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
969                 break;
970         case IP_VERSION(9, 2, 1):
971                 soc15_program_register_sequence(adev,
972                                                 golden_settings_gc_9_2_1,
973                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
974                 soc15_program_register_sequence(adev,
975                                                 golden_settings_gc_9_2_1_vg12,
976                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
977                 break;
978         case IP_VERSION(9, 4, 0):
979                 soc15_program_register_sequence(adev,
980                                                 golden_settings_gc_9_0,
981                                                 ARRAY_SIZE(golden_settings_gc_9_0));
982                 soc15_program_register_sequence(adev,
983                                                 golden_settings_gc_9_0_vg20,
984                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
985                 break;
986         case IP_VERSION(9, 4, 1):
987                 soc15_program_register_sequence(adev,
988                                                 golden_settings_gc_9_4_1_arct,
989                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
990                 break;
991         case IP_VERSION(9, 2, 2):
992         case IP_VERSION(9, 1, 0):
993                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
994                                                 ARRAY_SIZE(golden_settings_gc_9_1));
995                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
996                         soc15_program_register_sequence(adev,
997                                                         golden_settings_gc_9_1_rv2,
998                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
999                 else
1000                         soc15_program_register_sequence(adev,
1001                                                         golden_settings_gc_9_1_rv1,
1002                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1003                 break;
1004          case IP_VERSION(9, 3, 0):
1005                 soc15_program_register_sequence(adev,
1006                                                 golden_settings_gc_9_1_rn,
1007                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1008                 return; /* for renoir, don't need common goldensetting */
1009         case IP_VERSION(9, 4, 2):
1010                 gfx_v9_4_2_init_golden_registers(adev,
1011                                                  adev->smuio.funcs->get_die_id(adev));
1012                 break;
1013         default:
1014                 break;
1015         }
1016
1017         if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1018             (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
1019                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1020                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1021 }
1022
1023 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1024 {
1025         adev->gfx.scratch.num_reg = 8;
1026         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1027         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1028 }
1029
1030 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1031                                        bool wc, uint32_t reg, uint32_t val)
1032 {
1033         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1034         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1035                                 WRITE_DATA_DST_SEL(0) |
1036                                 (wc ? WR_CONFIRM : 0));
1037         amdgpu_ring_write(ring, reg);
1038         amdgpu_ring_write(ring, 0);
1039         amdgpu_ring_write(ring, val);
1040 }
1041
1042 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1043                                   int mem_space, int opt, uint32_t addr0,
1044                                   uint32_t addr1, uint32_t ref, uint32_t mask,
1045                                   uint32_t inv)
1046 {
1047         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1048         amdgpu_ring_write(ring,
1049                                  /* memory (1) or register (0) */
1050                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1051                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
1052                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1053                                  WAIT_REG_MEM_ENGINE(eng_sel)));
1054
1055         if (mem_space)
1056                 BUG_ON(addr0 & 0x3); /* Dword align */
1057         amdgpu_ring_write(ring, addr0);
1058         amdgpu_ring_write(ring, addr1);
1059         amdgpu_ring_write(ring, ref);
1060         amdgpu_ring_write(ring, mask);
1061         amdgpu_ring_write(ring, inv); /* poll interval */
1062 }
1063
1064 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1065 {
1066         struct amdgpu_device *adev = ring->adev;
1067         uint32_t scratch;
1068         uint32_t tmp = 0;
1069         unsigned i;
1070         int r;
1071
1072         r = amdgpu_gfx_scratch_get(adev, &scratch);
1073         if (r)
1074                 return r;
1075
1076         WREG32(scratch, 0xCAFEDEAD);
1077         r = amdgpu_ring_alloc(ring, 3);
1078         if (r)
1079                 goto error_free_scratch;
1080
1081         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1082         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1083         amdgpu_ring_write(ring, 0xDEADBEEF);
1084         amdgpu_ring_commit(ring);
1085
1086         for (i = 0; i < adev->usec_timeout; i++) {
1087                 tmp = RREG32(scratch);
1088                 if (tmp == 0xDEADBEEF)
1089                         break;
1090                 udelay(1);
1091         }
1092
1093         if (i >= adev->usec_timeout)
1094                 r = -ETIMEDOUT;
1095
1096 error_free_scratch:
1097         amdgpu_gfx_scratch_free(adev, scratch);
1098         return r;
1099 }
1100
1101 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1102 {
1103         struct amdgpu_device *adev = ring->adev;
1104         struct amdgpu_ib ib;
1105         struct dma_fence *f = NULL;
1106
1107         unsigned index;
1108         uint64_t gpu_addr;
1109         uint32_t tmp;
1110         long r;
1111
1112         r = amdgpu_device_wb_get(adev, &index);
1113         if (r)
1114                 return r;
1115
1116         gpu_addr = adev->wb.gpu_addr + (index * 4);
1117         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1118         memset(&ib, 0, sizeof(ib));
1119         r = amdgpu_ib_get(adev, NULL, 16,
1120                                         AMDGPU_IB_POOL_DIRECT, &ib);
1121         if (r)
1122                 goto err1;
1123
1124         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1125         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1126         ib.ptr[2] = lower_32_bits(gpu_addr);
1127         ib.ptr[3] = upper_32_bits(gpu_addr);
1128         ib.ptr[4] = 0xDEADBEEF;
1129         ib.length_dw = 5;
1130
1131         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1132         if (r)
1133                 goto err2;
1134
1135         r = dma_fence_wait_timeout(f, false, timeout);
1136         if (r == 0) {
1137                 r = -ETIMEDOUT;
1138                 goto err2;
1139         } else if (r < 0) {
1140                 goto err2;
1141         }
1142
1143         tmp = adev->wb.wb[index];
1144         if (tmp == 0xDEADBEEF)
1145                 r = 0;
1146         else
1147                 r = -EINVAL;
1148
1149 err2:
1150         amdgpu_ib_free(adev, &ib, NULL);
1151         dma_fence_put(f);
1152 err1:
1153         amdgpu_device_wb_free(adev, index);
1154         return r;
1155 }
1156
1157
1158 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1159 {
1160         release_firmware(adev->gfx.pfp_fw);
1161         adev->gfx.pfp_fw = NULL;
1162         release_firmware(adev->gfx.me_fw);
1163         adev->gfx.me_fw = NULL;
1164         release_firmware(adev->gfx.ce_fw);
1165         adev->gfx.ce_fw = NULL;
1166         release_firmware(adev->gfx.rlc_fw);
1167         adev->gfx.rlc_fw = NULL;
1168         release_firmware(adev->gfx.mec_fw);
1169         adev->gfx.mec_fw = NULL;
1170         release_firmware(adev->gfx.mec2_fw);
1171         adev->gfx.mec2_fw = NULL;
1172
1173         kfree(adev->gfx.rlc.register_list_format);
1174 }
1175
1176 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1177 {
1178         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1179
1180         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1181         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1182         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1183         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1184         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1185         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1186         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1187         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1188         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1189         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1190         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1191         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1192         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1193         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1194                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1195 }
1196
1197 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1198 {
1199         adev->gfx.me_fw_write_wait = false;
1200         adev->gfx.mec_fw_write_wait = false;
1201
1202         if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1203             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1204             (adev->gfx.mec_feature_version < 46) ||
1205             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1206             (adev->gfx.pfp_feature_version < 46)))
1207                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1208
1209         switch (adev->ip_versions[GC_HWIP][0]) {
1210         case IP_VERSION(9, 0, 1):
1211                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1212                     (adev->gfx.me_feature_version >= 42) &&
1213                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1214                     (adev->gfx.pfp_feature_version >= 42))
1215                         adev->gfx.me_fw_write_wait = true;
1216
1217                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1218                     (adev->gfx.mec_feature_version >= 42))
1219                         adev->gfx.mec_fw_write_wait = true;
1220                 break;
1221         case IP_VERSION(9, 2, 1):
1222                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1223                     (adev->gfx.me_feature_version >= 44) &&
1224                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1225                     (adev->gfx.pfp_feature_version >= 44))
1226                         adev->gfx.me_fw_write_wait = true;
1227
1228                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1229                     (adev->gfx.mec_feature_version >= 44))
1230                         adev->gfx.mec_fw_write_wait = true;
1231                 break;
1232         case IP_VERSION(9, 4, 0):
1233                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1234                     (adev->gfx.me_feature_version >= 44) &&
1235                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1236                     (adev->gfx.pfp_feature_version >= 44))
1237                         adev->gfx.me_fw_write_wait = true;
1238
1239                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1240                     (adev->gfx.mec_feature_version >= 44))
1241                         adev->gfx.mec_fw_write_wait = true;
1242                 break;
1243         case IP_VERSION(9, 1, 0):
1244         case IP_VERSION(9, 2, 2):
1245                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1246                     (adev->gfx.me_feature_version >= 42) &&
1247                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1248                     (adev->gfx.pfp_feature_version >= 42))
1249                         adev->gfx.me_fw_write_wait = true;
1250
1251                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1252                     (adev->gfx.mec_feature_version >= 42))
1253                         adev->gfx.mec_fw_write_wait = true;
1254                 break;
1255         default:
1256                 adev->gfx.me_fw_write_wait = true;
1257                 adev->gfx.mec_fw_write_wait = true;
1258                 break;
1259         }
1260 }
1261
1262 struct amdgpu_gfxoff_quirk {
1263         u16 chip_vendor;
1264         u16 chip_device;
1265         u16 subsys_vendor;
1266         u16 subsys_device;
1267         u8 revision;
1268 };
1269
1270 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1271         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1272         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1273         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1274         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1275         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1276         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1277         { 0, 0, 0, 0, 0 },
1278 };
1279
1280 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1281 {
1282         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1283
1284         while (p && p->chip_device != 0) {
1285                 if (pdev->vendor == p->chip_vendor &&
1286                     pdev->device == p->chip_device &&
1287                     pdev->subsystem_vendor == p->subsys_vendor &&
1288                     pdev->subsystem_device == p->subsys_device &&
1289                     pdev->revision == p->revision) {
1290                         return true;
1291                 }
1292                 ++p;
1293         }
1294         return false;
1295 }
1296
1297 static bool is_raven_kicker(struct amdgpu_device *adev)
1298 {
1299         if (adev->pm.fw_version >= 0x41e2b)
1300                 return true;
1301         else
1302                 return false;
1303 }
1304
1305 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1306 {
1307         if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1308             (adev->gfx.me_fw_version >= 0x000000a5) &&
1309             (adev->gfx.me_feature_version >= 52))
1310                 return true;
1311         else
1312                 return false;
1313 }
1314
1315 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1316 {
1317         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1318                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1319
1320         switch (adev->ip_versions[GC_HWIP][0]) {
1321         case IP_VERSION(9, 0, 1):
1322         case IP_VERSION(9, 2, 1):
1323         case IP_VERSION(9, 4, 0):
1324                 break;
1325         case IP_VERSION(9, 2, 2):
1326         case IP_VERSION(9, 1, 0):
1327                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1328                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1329                     ((!is_raven_kicker(adev) &&
1330                       adev->gfx.rlc_fw_version < 531) ||
1331                      (adev->gfx.rlc_feature_version < 1) ||
1332                      !adev->gfx.rlc.is_rlc_v2_1))
1333                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1334
1335                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1336                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1337                                 AMD_PG_SUPPORT_CP |
1338                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1339                 break;
1340         case IP_VERSION(9, 3, 0):
1341                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1342                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1343                                 AMD_PG_SUPPORT_CP |
1344                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1345                 break;
1346         default:
1347                 break;
1348         }
1349 }
1350
1351 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1352                                           const char *chip_name)
1353 {
1354         char fw_name[30];
1355         int err;
1356         struct amdgpu_firmware_info *info = NULL;
1357         const struct common_firmware_header *header = NULL;
1358         const struct gfx_firmware_header_v1_0 *cp_hdr;
1359
1360         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1361         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1362         if (err)
1363                 goto out;
1364         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1365         if (err)
1366                 goto out;
1367         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1368         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1369         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1370
1371         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1372         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1373         if (err)
1374                 goto out;
1375         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1376         if (err)
1377                 goto out;
1378         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1379         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1380         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1381
1382         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1383         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1384         if (err)
1385                 goto out;
1386         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1387         if (err)
1388                 goto out;
1389         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1390         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1391         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1392
1393         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1394                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1395                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1396                 info->fw = adev->gfx.pfp_fw;
1397                 header = (const struct common_firmware_header *)info->fw->data;
1398                 adev->firmware.fw_size +=
1399                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1400
1401                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1402                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1403                 info->fw = adev->gfx.me_fw;
1404                 header = (const struct common_firmware_header *)info->fw->data;
1405                 adev->firmware.fw_size +=
1406                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1407
1408                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1409                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1410                 info->fw = adev->gfx.ce_fw;
1411                 header = (const struct common_firmware_header *)info->fw->data;
1412                 adev->firmware.fw_size +=
1413                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1414         }
1415
1416 out:
1417         if (err) {
1418                 dev_err(adev->dev,
1419                         "gfx9: Failed to load firmware \"%s\"\n",
1420                         fw_name);
1421                 release_firmware(adev->gfx.pfp_fw);
1422                 adev->gfx.pfp_fw = NULL;
1423                 release_firmware(adev->gfx.me_fw);
1424                 adev->gfx.me_fw = NULL;
1425                 release_firmware(adev->gfx.ce_fw);
1426                 adev->gfx.ce_fw = NULL;
1427         }
1428         return err;
1429 }
1430
1431 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1432                                           const char *chip_name)
1433 {
1434         char fw_name[30];
1435         int err;
1436         struct amdgpu_firmware_info *info = NULL;
1437         const struct common_firmware_header *header = NULL;
1438         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1439         unsigned int *tmp = NULL;
1440         unsigned int i = 0;
1441         uint16_t version_major;
1442         uint16_t version_minor;
1443         uint32_t smu_version;
1444
1445         /*
1446          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1447          * instead of picasso_rlc.bin.
1448          * Judgment method:
1449          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1450          *          or revision >= 0xD8 && revision <= 0xDF
1451          * otherwise is PCO FP5
1452          */
1453         if (!strcmp(chip_name, "picasso") &&
1454                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1455                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1456                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1457         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1458                 (smu_version >= 0x41e2b))
1459                 /**
1460                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1461                 */
1462                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1463         else
1464                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1465         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1466         if (err)
1467                 goto out;
1468         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1469         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1470
1471         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1472         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1473         if (version_major == 2 && version_minor == 1)
1474                 adev->gfx.rlc.is_rlc_v2_1 = true;
1475
1476         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1477         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1478         adev->gfx.rlc.save_and_restore_offset =
1479                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1480         adev->gfx.rlc.clear_state_descriptor_offset =
1481                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1482         adev->gfx.rlc.avail_scratch_ram_locations =
1483                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1484         adev->gfx.rlc.reg_restore_list_size =
1485                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1486         adev->gfx.rlc.reg_list_format_start =
1487                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1488         adev->gfx.rlc.reg_list_format_separate_start =
1489                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1490         adev->gfx.rlc.starting_offsets_start =
1491                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1492         adev->gfx.rlc.reg_list_format_size_bytes =
1493                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1494         adev->gfx.rlc.reg_list_size_bytes =
1495                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1496         adev->gfx.rlc.register_list_format =
1497                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1498                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1499         if (!adev->gfx.rlc.register_list_format) {
1500                 err = -ENOMEM;
1501                 goto out;
1502         }
1503
1504         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1505                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1506         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1507                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1508
1509         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1510
1511         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1512                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1513         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1514                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1515
1516         if (adev->gfx.rlc.is_rlc_v2_1)
1517                 gfx_v9_0_init_rlc_ext_microcode(adev);
1518
1519         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1520                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1521                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1522                 info->fw = adev->gfx.rlc_fw;
1523                 header = (const struct common_firmware_header *)info->fw->data;
1524                 adev->firmware.fw_size +=
1525                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1526
1527                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1528                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1529                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1530                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1531                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1532                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1533                         info->fw = adev->gfx.rlc_fw;
1534                         adev->firmware.fw_size +=
1535                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1536
1537                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1538                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1539                         info->fw = adev->gfx.rlc_fw;
1540                         adev->firmware.fw_size +=
1541                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1542
1543                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1544                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1545                         info->fw = adev->gfx.rlc_fw;
1546                         adev->firmware.fw_size +=
1547                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1548                 }
1549         }
1550
1551 out:
1552         if (err) {
1553                 dev_err(adev->dev,
1554                         "gfx9: Failed to load firmware \"%s\"\n",
1555                         fw_name);
1556                 release_firmware(adev->gfx.rlc_fw);
1557                 adev->gfx.rlc_fw = NULL;
1558         }
1559         return err;
1560 }
1561
1562 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1563 {
1564         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1565             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1566             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1567                 return false;
1568
1569         return true;
1570 }
1571
1572 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1573                                           const char *chip_name)
1574 {
1575         char fw_name[30];
1576         int err;
1577         struct amdgpu_firmware_info *info = NULL;
1578         const struct common_firmware_header *header = NULL;
1579         const struct gfx_firmware_header_v1_0 *cp_hdr;
1580
1581         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1582         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1583         if (err)
1584                 goto out;
1585         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1586         if (err)
1587                 goto out;
1588         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1589         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1590         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1591
1592
1593         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1594                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1595                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1596                 if (!err) {
1597                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1598                         if (err)
1599                                 goto out;
1600                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1601                         adev->gfx.mec2_fw->data;
1602                         adev->gfx.mec2_fw_version =
1603                         le32_to_cpu(cp_hdr->header.ucode_version);
1604                         adev->gfx.mec2_feature_version =
1605                         le32_to_cpu(cp_hdr->ucode_feature_version);
1606                 } else {
1607                         err = 0;
1608                         adev->gfx.mec2_fw = NULL;
1609                 }
1610         } else {
1611                 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1612                 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1613         }
1614
1615         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1616                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1617                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1618                 info->fw = adev->gfx.mec_fw;
1619                 header = (const struct common_firmware_header *)info->fw->data;
1620                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1621                 adev->firmware.fw_size +=
1622                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1623
1624                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1625                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1626                 info->fw = adev->gfx.mec_fw;
1627                 adev->firmware.fw_size +=
1628                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1629
1630                 if (adev->gfx.mec2_fw) {
1631                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1632                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1633                         info->fw = adev->gfx.mec2_fw;
1634                         header = (const struct common_firmware_header *)info->fw->data;
1635                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1636                         adev->firmware.fw_size +=
1637                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1638
1639                         /* TODO: Determine if MEC2 JT FW loading can be removed
1640                                  for all GFX V9 asic and above */
1641                         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1642                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1643                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1644                                 info->fw = adev->gfx.mec2_fw;
1645                                 adev->firmware.fw_size +=
1646                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1647                                         PAGE_SIZE);
1648                         }
1649                 }
1650         }
1651
1652 out:
1653         gfx_v9_0_check_if_need_gfxoff(adev);
1654         gfx_v9_0_check_fw_write_wait(adev);
1655         if (err) {
1656                 dev_err(adev->dev,
1657                         "gfx9: Failed to load firmware \"%s\"\n",
1658                         fw_name);
1659                 release_firmware(adev->gfx.mec_fw);
1660                 adev->gfx.mec_fw = NULL;
1661                 release_firmware(adev->gfx.mec2_fw);
1662                 adev->gfx.mec2_fw = NULL;
1663         }
1664         return err;
1665 }
1666
1667 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1668 {
1669         const char *chip_name;
1670         int r;
1671
1672         DRM_DEBUG("\n");
1673
1674         switch (adev->ip_versions[GC_HWIP][0]) {
1675         case IP_VERSION(9, 0, 1):
1676                 chip_name = "vega10";
1677                 break;
1678         case IP_VERSION(9, 2, 1):
1679                 chip_name = "vega12";
1680                 break;
1681         case IP_VERSION(9, 4, 0):
1682                 chip_name = "vega20";
1683                 break;
1684         case IP_VERSION(9, 2, 2):
1685         case IP_VERSION(9, 1, 0):
1686                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1687                         chip_name = "raven2";
1688                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1689                         chip_name = "picasso";
1690                 else
1691                         chip_name = "raven";
1692                 break;
1693         case IP_VERSION(9, 4, 1):
1694                 chip_name = "arcturus";
1695                 break;
1696         case IP_VERSION(9, 3, 0):
1697                 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1698                         chip_name = "renoir";
1699                 else
1700                         chip_name = "green_sardine";
1701                 break;
1702         case IP_VERSION(9, 4, 2):
1703                 chip_name = "aldebaran";
1704                 break;
1705         default:
1706                 BUG();
1707         }
1708
1709         /* No CPG in Arcturus */
1710         if (adev->gfx.num_gfx_rings) {
1711                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1712                 if (r)
1713                         return r;
1714         }
1715
1716         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1717         if (r)
1718                 return r;
1719
1720         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1721         if (r)
1722                 return r;
1723
1724         return r;
1725 }
1726
1727 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1728 {
1729         u32 count = 0;
1730         const struct cs_section_def *sect = NULL;
1731         const struct cs_extent_def *ext = NULL;
1732
1733         /* begin clear state */
1734         count += 2;
1735         /* context control state */
1736         count += 3;
1737
1738         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1739                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1740                         if (sect->id == SECT_CONTEXT)
1741                                 count += 2 + ext->reg_count;
1742                         else
1743                                 return 0;
1744                 }
1745         }
1746
1747         /* end clear state */
1748         count += 2;
1749         /* clear state */
1750         count += 2;
1751
1752         return count;
1753 }
1754
1755 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1756                                     volatile u32 *buffer)
1757 {
1758         u32 count = 0, i;
1759         const struct cs_section_def *sect = NULL;
1760         const struct cs_extent_def *ext = NULL;
1761
1762         if (adev->gfx.rlc.cs_data == NULL)
1763                 return;
1764         if (buffer == NULL)
1765                 return;
1766
1767         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1768         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1769
1770         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1771         buffer[count++] = cpu_to_le32(0x80000000);
1772         buffer[count++] = cpu_to_le32(0x80000000);
1773
1774         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1775                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1776                         if (sect->id == SECT_CONTEXT) {
1777                                 buffer[count++] =
1778                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1779                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1780                                                 PACKET3_SET_CONTEXT_REG_START);
1781                                 for (i = 0; i < ext->reg_count; i++)
1782                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1783                         } else {
1784                                 return;
1785                         }
1786                 }
1787         }
1788
1789         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1790         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1791
1792         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1793         buffer[count++] = cpu_to_le32(0);
1794 }
1795
1796 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1797 {
1798         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1799         uint32_t pg_always_on_cu_num = 2;
1800         uint32_t always_on_cu_num;
1801         uint32_t i, j, k;
1802         uint32_t mask, cu_bitmap, counter;
1803
1804         if (adev->flags & AMD_IS_APU)
1805                 always_on_cu_num = 4;
1806         else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1807                 always_on_cu_num = 8;
1808         else
1809                 always_on_cu_num = 12;
1810
1811         mutex_lock(&adev->grbm_idx_mutex);
1812         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1813                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1814                         mask = 1;
1815                         cu_bitmap = 0;
1816                         counter = 0;
1817                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1818
1819                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1820                                 if (cu_info->bitmap[i][j] & mask) {
1821                                         if (counter == pg_always_on_cu_num)
1822                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1823                                         if (counter < always_on_cu_num)
1824                                                 cu_bitmap |= mask;
1825                                         else
1826                                                 break;
1827                                         counter++;
1828                                 }
1829                                 mask <<= 1;
1830                         }
1831
1832                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1833                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1834                 }
1835         }
1836         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1837         mutex_unlock(&adev->grbm_idx_mutex);
1838 }
1839
1840 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1841 {
1842         uint32_t data;
1843
1844         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1845         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1846         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1847         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1848         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1849
1850         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1851         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1852
1853         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1854         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1855
1856         mutex_lock(&adev->grbm_idx_mutex);
1857         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1858         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1859         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1860
1861         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1862         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1863         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1864         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1865         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1866
1867         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1868         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1869         data &= 0x0000FFFF;
1870         data |= 0x00C00000;
1871         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1872
1873         /*
1874          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1875          * programmed in gfx_v9_0_init_always_on_cu_mask()
1876          */
1877
1878         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1879          * but used for RLC_LB_CNTL configuration */
1880         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1881         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1882         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1883         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1884         mutex_unlock(&adev->grbm_idx_mutex);
1885
1886         gfx_v9_0_init_always_on_cu_mask(adev);
1887 }
1888
1889 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1890 {
1891         uint32_t data;
1892
1893         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1894         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1895         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1896         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1897         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1898
1899         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1900         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1901
1902         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1903         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1904
1905         mutex_lock(&adev->grbm_idx_mutex);
1906         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1907         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1908         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1909
1910         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1911         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1912         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1913         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1914         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1915
1916         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1917         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1918         data &= 0x0000FFFF;
1919         data |= 0x00C00000;
1920         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1921
1922         /*
1923          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1924          * programmed in gfx_v9_0_init_always_on_cu_mask()
1925          */
1926
1927         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1928          * but used for RLC_LB_CNTL configuration */
1929         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1930         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1931         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1932         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1933         mutex_unlock(&adev->grbm_idx_mutex);
1934
1935         gfx_v9_0_init_always_on_cu_mask(adev);
1936 }
1937
1938 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1939 {
1940         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1941 }
1942
1943 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1944 {
1945         if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1946                 return 5;
1947         else
1948                 return 4;
1949 }
1950
1951 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1952 {
1953         const struct cs_section_def *cs_data;
1954         int r;
1955
1956         adev->gfx.rlc.cs_data = gfx9_cs_data;
1957
1958         cs_data = adev->gfx.rlc.cs_data;
1959
1960         if (cs_data) {
1961                 /* init clear state block */
1962                 r = amdgpu_gfx_rlc_init_csb(adev);
1963                 if (r)
1964                         return r;
1965         }
1966
1967         if (adev->flags & AMD_IS_APU) {
1968                 /* TODO: double check the cp_table_size for RV */
1969                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1970                 r = amdgpu_gfx_rlc_init_cpt(adev);
1971                 if (r)
1972                         return r;
1973         }
1974
1975         switch (adev->ip_versions[GC_HWIP][0]) {
1976         case IP_VERSION(9, 2, 2):
1977         case IP_VERSION(9, 1, 0):
1978                 gfx_v9_0_init_lbpw(adev);
1979                 break;
1980         case IP_VERSION(9, 4, 0):
1981                 gfx_v9_4_init_lbpw(adev);
1982                 break;
1983         default:
1984                 break;
1985         }
1986
1987         /* init spm vmid with 0xf */
1988         if (adev->gfx.rlc.funcs->update_spm_vmid)
1989                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1990
1991         return 0;
1992 }
1993
1994 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1995 {
1996         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1997         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1998 }
1999
2000 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
2001 {
2002         int r;
2003         u32 *hpd;
2004         const __le32 *fw_data;
2005         unsigned fw_size;
2006         u32 *fw;
2007         size_t mec_hpd_size;
2008
2009         const struct gfx_firmware_header_v1_0 *mec_hdr;
2010
2011         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2012
2013         /* take ownership of the relevant compute queues */
2014         amdgpu_gfx_compute_queue_acquire(adev);
2015         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2016         if (mec_hpd_size) {
2017                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2018                                               AMDGPU_GEM_DOMAIN_VRAM,
2019                                               &adev->gfx.mec.hpd_eop_obj,
2020                                               &adev->gfx.mec.hpd_eop_gpu_addr,
2021                                               (void **)&hpd);
2022                 if (r) {
2023                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2024                         gfx_v9_0_mec_fini(adev);
2025                         return r;
2026                 }
2027
2028                 memset(hpd, 0, mec_hpd_size);
2029
2030                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2031                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2032         }
2033
2034         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2035
2036         fw_data = (const __le32 *)
2037                 (adev->gfx.mec_fw->data +
2038                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2039         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2040
2041         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2042                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2043                                       &adev->gfx.mec.mec_fw_obj,
2044                                       &adev->gfx.mec.mec_fw_gpu_addr,
2045                                       (void **)&fw);
2046         if (r) {
2047                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2048                 gfx_v9_0_mec_fini(adev);
2049                 return r;
2050         }
2051
2052         memcpy(fw, fw_data, fw_size);
2053
2054         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2055         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2056
2057         return 0;
2058 }
2059
2060 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2061 {
2062         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2063                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2064                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2065                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
2066                 (SQ_IND_INDEX__FORCE_READ_MASK));
2067         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2068 }
2069
2070 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2071                            uint32_t wave, uint32_t thread,
2072                            uint32_t regno, uint32_t num, uint32_t *out)
2073 {
2074         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2075                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2076                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2077                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2078                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2079                 (SQ_IND_INDEX__FORCE_READ_MASK) |
2080                 (SQ_IND_INDEX__AUTO_INCR_MASK));
2081         while (num--)
2082                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2083 }
2084
2085 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2086 {
2087         /* type 1 wave data */
2088         dst[(*no_fields)++] = 1;
2089         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2090         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2091         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2092         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2093         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2094         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2095         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2096         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2097         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2098         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2099         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2100         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2101         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2102         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2103         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2104 }
2105
2106 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2107                                      uint32_t wave, uint32_t start,
2108                                      uint32_t size, uint32_t *dst)
2109 {
2110         wave_read_regs(
2111                 adev, simd, wave, 0,
2112                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2113 }
2114
2115 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2116                                      uint32_t wave, uint32_t thread,
2117                                      uint32_t start, uint32_t size,
2118                                      uint32_t *dst)
2119 {
2120         wave_read_regs(
2121                 adev, simd, wave, thread,
2122                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2123 }
2124
2125 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2126                                   u32 me, u32 pipe, u32 q, u32 vm)
2127 {
2128         soc15_grbm_select(adev, me, pipe, q, vm);
2129 }
2130
2131 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2132         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2133         .select_se_sh = &gfx_v9_0_select_se_sh,
2134         .read_wave_data = &gfx_v9_0_read_wave_data,
2135         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2136         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2137         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2138 };
2139
2140 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2141         .ras_late_init = amdgpu_gfx_ras_late_init,
2142         .ras_fini = amdgpu_gfx_ras_fini,
2143         .ras_error_inject = &gfx_v9_0_ras_error_inject,
2144         .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2145         .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2146 };
2147
2148 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2149 {
2150         u32 gb_addr_config;
2151         int err;
2152
2153         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2154
2155         switch (adev->ip_versions[GC_HWIP][0]) {
2156         case IP_VERSION(9, 0, 1):
2157                 adev->gfx.config.max_hw_contexts = 8;
2158                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2159                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2160                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2161                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2162                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2163                 break;
2164         case IP_VERSION(9, 2, 1):
2165                 adev->gfx.config.max_hw_contexts = 8;
2166                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2167                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2168                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2169                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2170                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2171                 DRM_INFO("fix gfx.config for vega12\n");
2172                 break;
2173         case IP_VERSION(9, 4, 0):
2174                 adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2175                 adev->gfx.config.max_hw_contexts = 8;
2176                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2177                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2178                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2179                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2180                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2181                 gb_addr_config &= ~0xf3e777ff;
2182                 gb_addr_config |= 0x22014042;
2183                 /* check vbios table if gpu info is not available */
2184                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2185                 if (err)
2186                         return err;
2187                 break;
2188         case IP_VERSION(9, 2, 2):
2189         case IP_VERSION(9, 1, 0):
2190                 adev->gfx.config.max_hw_contexts = 8;
2191                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2192                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2193                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2194                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2195                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2196                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2197                 else
2198                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2199                 break;
2200         case IP_VERSION(9, 4, 1):
2201                 adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2202                 adev->gfx.config.max_hw_contexts = 8;
2203                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2204                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2205                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2206                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2207                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2208                 gb_addr_config &= ~0xf3e777ff;
2209                 gb_addr_config |= 0x22014042;
2210                 break;
2211         case IP_VERSION(9, 3, 0):
2212                 adev->gfx.config.max_hw_contexts = 8;
2213                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2214                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2215                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2216                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2217                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2218                 gb_addr_config &= ~0xf3e777ff;
2219                 gb_addr_config |= 0x22010042;
2220                 break;
2221         case IP_VERSION(9, 4, 2):
2222                 adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2223                 adev->gfx.config.max_hw_contexts = 8;
2224                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2225                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2226                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2227                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2228                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2229                 gb_addr_config &= ~0xf3e777ff;
2230                 gb_addr_config |= 0x22014042;
2231                 /* check vbios table if gpu info is not available */
2232                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2233                 if (err)
2234                         return err;
2235                 break;
2236         default:
2237                 BUG();
2238                 break;
2239         }
2240
2241         adev->gfx.config.gb_addr_config = gb_addr_config;
2242
2243         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2244                         REG_GET_FIELD(
2245                                         adev->gfx.config.gb_addr_config,
2246                                         GB_ADDR_CONFIG,
2247                                         NUM_PIPES);
2248
2249         adev->gfx.config.max_tile_pipes =
2250                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2251
2252         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2253                         REG_GET_FIELD(
2254                                         adev->gfx.config.gb_addr_config,
2255                                         GB_ADDR_CONFIG,
2256                                         NUM_BANKS);
2257         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2258                         REG_GET_FIELD(
2259                                         adev->gfx.config.gb_addr_config,
2260                                         GB_ADDR_CONFIG,
2261                                         MAX_COMPRESSED_FRAGS);
2262         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2263                         REG_GET_FIELD(
2264                                         adev->gfx.config.gb_addr_config,
2265                                         GB_ADDR_CONFIG,
2266                                         NUM_RB_PER_SE);
2267         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2268                         REG_GET_FIELD(
2269                                         adev->gfx.config.gb_addr_config,
2270                                         GB_ADDR_CONFIG,
2271                                         NUM_SHADER_ENGINES);
2272         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2273                         REG_GET_FIELD(
2274                                         adev->gfx.config.gb_addr_config,
2275                                         GB_ADDR_CONFIG,
2276                                         PIPE_INTERLEAVE_SIZE));
2277
2278         return 0;
2279 }
2280
2281 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2282                                       int mec, int pipe, int queue)
2283 {
2284         unsigned irq_type;
2285         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2286         unsigned int hw_prio;
2287
2288         ring = &adev->gfx.compute_ring[ring_id];
2289
2290         /* mec0 is me1 */
2291         ring->me = mec + 1;
2292         ring->pipe = pipe;
2293         ring->queue = queue;
2294
2295         ring->ring_obj = NULL;
2296         ring->use_doorbell = true;
2297         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2298         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2299                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2300         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2301
2302         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2303                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2304                 + ring->pipe;
2305         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2306                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2307         /* type-2 packets are deprecated on MEC, use type-3 instead */
2308         return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2309                                 hw_prio, NULL);
2310 }
2311
2312 static int gfx_v9_0_sw_init(void *handle)
2313 {
2314         int i, j, k, r, ring_id;
2315         struct amdgpu_ring *ring;
2316         struct amdgpu_kiq *kiq;
2317         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2318
2319         switch (adev->ip_versions[GC_HWIP][0]) {
2320         case IP_VERSION(9, 0, 1):
2321         case IP_VERSION(9, 2, 1):
2322         case IP_VERSION(9, 4, 0):
2323         case IP_VERSION(9, 2, 2):
2324         case IP_VERSION(9, 1, 0):
2325         case IP_VERSION(9, 4, 1):
2326         case IP_VERSION(9, 3, 0):
2327         case IP_VERSION(9, 4, 2):
2328                 adev->gfx.mec.num_mec = 2;
2329                 break;
2330         default:
2331                 adev->gfx.mec.num_mec = 1;
2332                 break;
2333         }
2334
2335         adev->gfx.mec.num_pipe_per_mec = 4;
2336         adev->gfx.mec.num_queue_per_pipe = 8;
2337
2338         /* EOP Event */
2339         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2340         if (r)
2341                 return r;
2342
2343         /* Privileged reg */
2344         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2345                               &adev->gfx.priv_reg_irq);
2346         if (r)
2347                 return r;
2348
2349         /* Privileged inst */
2350         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2351                               &adev->gfx.priv_inst_irq);
2352         if (r)
2353                 return r;
2354
2355         /* ECC error */
2356         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2357                               &adev->gfx.cp_ecc_error_irq);
2358         if (r)
2359                 return r;
2360
2361         /* FUE error */
2362         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2363                               &adev->gfx.cp_ecc_error_irq);
2364         if (r)
2365                 return r;
2366
2367         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2368
2369         gfx_v9_0_scratch_init(adev);
2370
2371         r = gfx_v9_0_init_microcode(adev);
2372         if (r) {
2373                 DRM_ERROR("Failed to load gfx firmware!\n");
2374                 return r;
2375         }
2376
2377         r = adev->gfx.rlc.funcs->init(adev);
2378         if (r) {
2379                 DRM_ERROR("Failed to init rlc BOs!\n");
2380                 return r;
2381         }
2382
2383         r = gfx_v9_0_mec_init(adev);
2384         if (r) {
2385                 DRM_ERROR("Failed to init MEC BOs!\n");
2386                 return r;
2387         }
2388
2389         /* set up the gfx ring */
2390         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2391                 ring = &adev->gfx.gfx_ring[i];
2392                 ring->ring_obj = NULL;
2393                 if (!i)
2394                         sprintf(ring->name, "gfx");
2395                 else
2396                         sprintf(ring->name, "gfx_%d", i);
2397                 ring->use_doorbell = true;
2398                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2399                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2400                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2401                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2402                 if (r)
2403                         return r;
2404         }
2405
2406         /* set up the compute queues - allocate horizontally across pipes */
2407         ring_id = 0;
2408         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2409                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2410                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2411                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2412                                         continue;
2413
2414                                 r = gfx_v9_0_compute_ring_init(adev,
2415                                                                ring_id,
2416                                                                i, k, j);
2417                                 if (r)
2418                                         return r;
2419
2420                                 ring_id++;
2421                         }
2422                 }
2423         }
2424
2425         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2426         if (r) {
2427                 DRM_ERROR("Failed to init KIQ BOs!\n");
2428                 return r;
2429         }
2430
2431         kiq = &adev->gfx.kiq;
2432         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2433         if (r)
2434                 return r;
2435
2436         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2437         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2438         if (r)
2439                 return r;
2440
2441         adev->gfx.ce_ram_size = 0x8000;
2442
2443         r = gfx_v9_0_gpu_early_init(adev);
2444         if (r)
2445                 return r;
2446
2447         return 0;
2448 }
2449
2450
2451 static int gfx_v9_0_sw_fini(void *handle)
2452 {
2453         int i;
2454         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2455
2456         if (adev->gfx.ras_funcs &&
2457             adev->gfx.ras_funcs->ras_fini)
2458                 adev->gfx.ras_funcs->ras_fini(adev);
2459
2460         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2461                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2462         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2463                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2464
2465         amdgpu_gfx_mqd_sw_fini(adev);
2466         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2467         amdgpu_gfx_kiq_fini(adev);
2468
2469         gfx_v9_0_mec_fini(adev);
2470         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2471                                 &adev->gfx.rlc.clear_state_gpu_addr,
2472                                 (void **)&adev->gfx.rlc.cs_ptr);
2473         if (adev->flags & AMD_IS_APU) {
2474                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2475                                 &adev->gfx.rlc.cp_table_gpu_addr,
2476                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2477         }
2478         gfx_v9_0_free_microcode(adev);
2479
2480         return 0;
2481 }
2482
2483
2484 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2485 {
2486         /* TODO */
2487 }
2488
2489 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2490                            u32 instance)
2491 {
2492         u32 data;
2493
2494         if (instance == 0xffffffff)
2495                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2496         else
2497                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2498
2499         if (se_num == 0xffffffff)
2500                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2501         else
2502                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2503
2504         if (sh_num == 0xffffffff)
2505                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2506         else
2507                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2508
2509         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2510 }
2511
2512 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2513 {
2514         u32 data, mask;
2515
2516         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2517         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2518
2519         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2520         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2521
2522         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2523                                          adev->gfx.config.max_sh_per_se);
2524
2525         return (~data) & mask;
2526 }
2527
2528 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2529 {
2530         int i, j;
2531         u32 data;
2532         u32 active_rbs = 0;
2533         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2534                                         adev->gfx.config.max_sh_per_se;
2535
2536         mutex_lock(&adev->grbm_idx_mutex);
2537         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2538                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2539                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2540                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2541                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2542                                                rb_bitmap_width_per_sh);
2543                 }
2544         }
2545         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2546         mutex_unlock(&adev->grbm_idx_mutex);
2547
2548         adev->gfx.config.backend_enable_mask = active_rbs;
2549         adev->gfx.config.num_rbs = hweight32(active_rbs);
2550 }
2551
2552 #define DEFAULT_SH_MEM_BASES    (0x6000)
2553 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2554 {
2555         int i;
2556         uint32_t sh_mem_config;
2557         uint32_t sh_mem_bases;
2558
2559         /*
2560          * Configure apertures:
2561          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2562          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2563          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2564          */
2565         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2566
2567         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2568                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2569                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2570
2571         mutex_lock(&adev->srbm_mutex);
2572         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2573                 soc15_grbm_select(adev, 0, 0, 0, i);
2574                 /* CP and shaders */
2575                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2576                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2577         }
2578         soc15_grbm_select(adev, 0, 0, 0, 0);
2579         mutex_unlock(&adev->srbm_mutex);
2580
2581         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2582            acccess. These should be enabled by FW for target VMIDs. */
2583         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2584                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2585                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2586                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2587                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2588         }
2589 }
2590
2591 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2592 {
2593         int vmid;
2594
2595         /*
2596          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2597          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2598          * the driver can enable them for graphics. VMID0 should maintain
2599          * access so that HWS firmware can save/restore entries.
2600          */
2601         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2602                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2603                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2604                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2605                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2606         }
2607 }
2608
2609 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2610 {
2611         uint32_t tmp;
2612
2613         switch (adev->ip_versions[GC_HWIP][0]) {
2614         case IP_VERSION(9, 4, 1):
2615                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2616                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2617                                         DISABLE_BARRIER_WAITCNT, 1);
2618                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2619                 break;
2620         default:
2621                 break;
2622         }
2623 }
2624
2625 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2626 {
2627         u32 tmp;
2628         int i;
2629
2630         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2631
2632         gfx_v9_0_tiling_mode_table_init(adev);
2633
2634         gfx_v9_0_setup_rb(adev);
2635         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2636         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2637
2638         /* XXX SH_MEM regs */
2639         /* where to put LDS, scratch, GPUVM in FSA64 space */
2640         mutex_lock(&adev->srbm_mutex);
2641         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2642                 soc15_grbm_select(adev, 0, 0, 0, i);
2643                 /* CP and shaders */
2644                 if (i == 0) {
2645                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2646                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2647                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2648                                             !!adev->gmc.noretry);
2649                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2650                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2651                 } else {
2652                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2653                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2654                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2655                                             !!adev->gmc.noretry);
2656                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2657                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2658                                 (adev->gmc.private_aperture_start >> 48));
2659                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2660                                 (adev->gmc.shared_aperture_start >> 48));
2661                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2662                 }
2663         }
2664         soc15_grbm_select(adev, 0, 0, 0, 0);
2665
2666         mutex_unlock(&adev->srbm_mutex);
2667
2668         gfx_v9_0_init_compute_vmid(adev);
2669         gfx_v9_0_init_gds_vmid(adev);
2670         gfx_v9_0_init_sq_config(adev);
2671 }
2672
2673 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2674 {
2675         u32 i, j, k;
2676         u32 mask;
2677
2678         mutex_lock(&adev->grbm_idx_mutex);
2679         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2680                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2681                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2682                         for (k = 0; k < adev->usec_timeout; k++) {
2683                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2684                                         break;
2685                                 udelay(1);
2686                         }
2687                         if (k == adev->usec_timeout) {
2688                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2689                                                       0xffffffff, 0xffffffff);
2690                                 mutex_unlock(&adev->grbm_idx_mutex);
2691                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2692                                          i, j);
2693                                 return;
2694                         }
2695                 }
2696         }
2697         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2698         mutex_unlock(&adev->grbm_idx_mutex);
2699
2700         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2701                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2702                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2703                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2704         for (k = 0; k < adev->usec_timeout; k++) {
2705                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2706                         break;
2707                 udelay(1);
2708         }
2709 }
2710
2711 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2712                                                bool enable)
2713 {
2714         u32 tmp;
2715
2716         /* These interrupts should be enabled to drive DS clock */
2717
2718         tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2719
2720         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2721         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2722         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2723         if(adev->gfx.num_gfx_rings)
2724                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2725
2726         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2727 }
2728
2729 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2730 {
2731         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2732         /* csib */
2733         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2734                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2735         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2736                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2737         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2738                         adev->gfx.rlc.clear_state_size);
2739 }
2740
2741 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2742                                 int indirect_offset,
2743                                 int list_size,
2744                                 int *unique_indirect_regs,
2745                                 int unique_indirect_reg_count,
2746                                 int *indirect_start_offsets,
2747                                 int *indirect_start_offsets_count,
2748                                 int max_start_offsets_count)
2749 {
2750         int idx;
2751
2752         for (; indirect_offset < list_size; indirect_offset++) {
2753                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2754                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2755                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2756
2757                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2758                         indirect_offset += 2;
2759
2760                         /* look for the matching indice */
2761                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2762                                 if (unique_indirect_regs[idx] ==
2763                                         register_list_format[indirect_offset] ||
2764                                         !unique_indirect_regs[idx])
2765                                         break;
2766                         }
2767
2768                         BUG_ON(idx >= unique_indirect_reg_count);
2769
2770                         if (!unique_indirect_regs[idx])
2771                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2772
2773                         indirect_offset++;
2774                 }
2775         }
2776 }
2777
2778 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2779 {
2780         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2781         int unique_indirect_reg_count = 0;
2782
2783         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2784         int indirect_start_offsets_count = 0;
2785
2786         int list_size = 0;
2787         int i = 0, j = 0;
2788         u32 tmp = 0;
2789
2790         u32 *register_list_format =
2791                 kmemdup(adev->gfx.rlc.register_list_format,
2792                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2793         if (!register_list_format)
2794                 return -ENOMEM;
2795
2796         /* setup unique_indirect_regs array and indirect_start_offsets array */
2797         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2798         gfx_v9_1_parse_ind_reg_list(register_list_format,
2799                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2800                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2801                                     unique_indirect_regs,
2802                                     unique_indirect_reg_count,
2803                                     indirect_start_offsets,
2804                                     &indirect_start_offsets_count,
2805                                     ARRAY_SIZE(indirect_start_offsets));
2806
2807         /* enable auto inc in case it is disabled */
2808         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2809         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2810         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2811
2812         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2813         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2814                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2815         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2816                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2817                         adev->gfx.rlc.register_restore[i]);
2818
2819         /* load indirect register */
2820         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2821                 adev->gfx.rlc.reg_list_format_start);
2822
2823         /* direct register portion */
2824         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2825                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2826                         register_list_format[i]);
2827
2828         /* indirect register portion */
2829         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2830                 if (register_list_format[i] == 0xFFFFFFFF) {
2831                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2832                         continue;
2833                 }
2834
2835                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2836                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2837
2838                 for (j = 0; j < unique_indirect_reg_count; j++) {
2839                         if (register_list_format[i] == unique_indirect_regs[j]) {
2840                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2841                                 break;
2842                         }
2843                 }
2844
2845                 BUG_ON(j >= unique_indirect_reg_count);
2846
2847                 i++;
2848         }
2849
2850         /* set save/restore list size */
2851         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2852         list_size = list_size >> 1;
2853         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2854                 adev->gfx.rlc.reg_restore_list_size);
2855         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2856
2857         /* write the starting offsets to RLC scratch ram */
2858         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2859                 adev->gfx.rlc.starting_offsets_start);
2860         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2861                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2862                        indirect_start_offsets[i]);
2863
2864         /* load unique indirect regs*/
2865         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2866                 if (unique_indirect_regs[i] != 0) {
2867                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2868                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2869                                unique_indirect_regs[i] & 0x3FFFF);
2870
2871                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2872                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2873                                unique_indirect_regs[i] >> 20);
2874                 }
2875         }
2876
2877         kfree(register_list_format);
2878         return 0;
2879 }
2880
2881 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2882 {
2883         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2884 }
2885
2886 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2887                                              bool enable)
2888 {
2889         uint32_t data = 0;
2890         uint32_t default_data = 0;
2891
2892         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2893         if (enable) {
2894                 /* enable GFXIP control over CGPG */
2895                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2896                 if(default_data != data)
2897                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2898
2899                 /* update status */
2900                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2901                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2902                 if(default_data != data)
2903                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2904         } else {
2905                 /* restore GFXIP control over GCPG */
2906                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2907                 if(default_data != data)
2908                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2909         }
2910 }
2911
2912 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2913 {
2914         uint32_t data = 0;
2915
2916         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2917                               AMD_PG_SUPPORT_GFX_SMG |
2918                               AMD_PG_SUPPORT_GFX_DMG)) {
2919                 /* init IDLE_POLL_COUNT = 60 */
2920                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2921                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2922                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2923                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2924
2925                 /* init RLC PG Delay */
2926                 data = 0;
2927                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2928                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2929                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2930                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2931                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2932
2933                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2934                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2935                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2936                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2937
2938                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2939                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2940                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2941                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2942
2943                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2944                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2945
2946                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2947                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2948                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2949                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2950                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2951         }
2952 }
2953
2954 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2955                                                 bool enable)
2956 {
2957         uint32_t data = 0;
2958         uint32_t default_data = 0;
2959
2960         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2961         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2962                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2963                              enable ? 1 : 0);
2964         if (default_data != data)
2965                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2966 }
2967
2968 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2969                                                 bool enable)
2970 {
2971         uint32_t data = 0;
2972         uint32_t default_data = 0;
2973
2974         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2975         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2976                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2977                              enable ? 1 : 0);
2978         if(default_data != data)
2979                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2980 }
2981
2982 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2983                                         bool enable)
2984 {
2985         uint32_t data = 0;
2986         uint32_t default_data = 0;
2987
2988         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2989         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2990                              CP_PG_DISABLE,
2991                              enable ? 0 : 1);
2992         if(default_data != data)
2993                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2994 }
2995
2996 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2997                                                 bool enable)
2998 {
2999         uint32_t data, default_data;
3000
3001         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3002         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3003                              GFX_POWER_GATING_ENABLE,
3004                              enable ? 1 : 0);
3005         if(default_data != data)
3006                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3007 }
3008
3009 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3010                                                 bool enable)
3011 {
3012         uint32_t data, default_data;
3013
3014         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3015         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3016                              GFX_PIPELINE_PG_ENABLE,
3017                              enable ? 1 : 0);
3018         if(default_data != data)
3019                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3020
3021         if (!enable)
3022                 /* read any GFX register to wake up GFX */
3023                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3024 }
3025
3026 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3027                                                        bool enable)
3028 {
3029         uint32_t data, default_data;
3030
3031         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3032         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3033                              STATIC_PER_CU_PG_ENABLE,
3034                              enable ? 1 : 0);
3035         if(default_data != data)
3036                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3037 }
3038
3039 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3040                                                 bool enable)
3041 {
3042         uint32_t data, default_data;
3043
3044         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3045         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3046                              DYN_PER_CU_PG_ENABLE,
3047                              enable ? 1 : 0);
3048         if(default_data != data)
3049                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3050 }
3051
3052 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3053 {
3054         gfx_v9_0_init_csb(adev);
3055
3056         /*
3057          * Rlc save restore list is workable since v2_1.
3058          * And it's needed by gfxoff feature.
3059          */
3060         if (adev->gfx.rlc.is_rlc_v2_1) {
3061                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3062                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
3063                         gfx_v9_1_init_rlc_save_restore_list(adev);
3064                 gfx_v9_0_enable_save_restore_machine(adev);
3065         }
3066
3067         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3068                               AMD_PG_SUPPORT_GFX_SMG |
3069                               AMD_PG_SUPPORT_GFX_DMG |
3070                               AMD_PG_SUPPORT_CP |
3071                               AMD_PG_SUPPORT_GDS |
3072                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3073                 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3074                              adev->gfx.rlc.cp_table_gpu_addr >> 8);
3075                 gfx_v9_0_init_gfx_power_gating(adev);
3076         }
3077 }
3078
3079 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3080 {
3081         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3082         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3083         gfx_v9_0_wait_for_rlc_serdes(adev);
3084 }
3085
3086 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3087 {
3088         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3089         udelay(50);
3090         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3091         udelay(50);
3092 }
3093
3094 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3095 {
3096 #ifdef AMDGPU_RLC_DEBUG_RETRY
3097         u32 rlc_ucode_ver;
3098 #endif
3099
3100         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3101         udelay(50);
3102
3103         /* carrizo do enable cp interrupt after cp inited */
3104         if (!(adev->flags & AMD_IS_APU)) {
3105                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3106                 udelay(50);
3107         }
3108
3109 #ifdef AMDGPU_RLC_DEBUG_RETRY
3110         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3111         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3112         if(rlc_ucode_ver == 0x108) {
3113                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3114                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3115                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3116                  * default is 0x9C4 to create a 100us interval */
3117                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3118                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3119                  * to disable the page fault retry interrupts, default is
3120                  * 0x100 (256) */
3121                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3122         }
3123 #endif
3124 }
3125
3126 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3127 {
3128         const struct rlc_firmware_header_v2_0 *hdr;
3129         const __le32 *fw_data;
3130         unsigned i, fw_size;
3131
3132         if (!adev->gfx.rlc_fw)
3133                 return -EINVAL;
3134
3135         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3136         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3137
3138         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3139                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3140         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3141
3142         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3143                         RLCG_UCODE_LOADING_START_ADDRESS);
3144         for (i = 0; i < fw_size; i++)
3145                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3146         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3147
3148         return 0;
3149 }
3150
3151 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3152 {
3153         int r;
3154
3155         if (amdgpu_sriov_vf(adev)) {
3156                 gfx_v9_0_init_csb(adev);
3157                 return 0;
3158         }
3159
3160         adev->gfx.rlc.funcs->stop(adev);
3161
3162         /* disable CG */
3163         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3164
3165         gfx_v9_0_init_pg(adev);
3166
3167         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3168                 /* legacy rlc firmware loading */
3169                 r = gfx_v9_0_rlc_load_microcode(adev);
3170                 if (r)
3171                         return r;
3172         }
3173
3174         switch (adev->ip_versions[GC_HWIP][0]) {
3175         case IP_VERSION(9, 2, 2):
3176         case IP_VERSION(9, 1, 0):
3177                 if (amdgpu_lbpw == 0)
3178                         gfx_v9_0_enable_lbpw(adev, false);
3179                 else
3180                         gfx_v9_0_enable_lbpw(adev, true);
3181                 break;
3182         case IP_VERSION(9, 4, 0):
3183                 if (amdgpu_lbpw > 0)
3184                         gfx_v9_0_enable_lbpw(adev, true);
3185                 else
3186                         gfx_v9_0_enable_lbpw(adev, false);
3187                 break;
3188         default:
3189                 break;
3190         }
3191
3192         adev->gfx.rlc.funcs->start(adev);
3193
3194         return 0;
3195 }
3196
3197 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3198 {
3199         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3200
3201         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3202         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3203         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3204         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3205         udelay(50);
3206 }
3207
3208 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3209 {
3210         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3211         const struct gfx_firmware_header_v1_0 *ce_hdr;
3212         const struct gfx_firmware_header_v1_0 *me_hdr;
3213         const __le32 *fw_data;
3214         unsigned i, fw_size;
3215
3216         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3217                 return -EINVAL;
3218
3219         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3220                 adev->gfx.pfp_fw->data;
3221         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3222                 adev->gfx.ce_fw->data;
3223         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3224                 adev->gfx.me_fw->data;
3225
3226         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3227         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3228         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3229
3230         gfx_v9_0_cp_gfx_enable(adev, false);
3231
3232         /* PFP */
3233         fw_data = (const __le32 *)
3234                 (adev->gfx.pfp_fw->data +
3235                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3236         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3237         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3238         for (i = 0; i < fw_size; i++)
3239                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3240         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3241
3242         /* CE */
3243         fw_data = (const __le32 *)
3244                 (adev->gfx.ce_fw->data +
3245                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3246         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3247         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3248         for (i = 0; i < fw_size; i++)
3249                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3250         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3251
3252         /* ME */
3253         fw_data = (const __le32 *)
3254                 (adev->gfx.me_fw->data +
3255                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3256         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3257         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3258         for (i = 0; i < fw_size; i++)
3259                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3260         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3261
3262         return 0;
3263 }
3264
3265 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3266 {
3267         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3268         const struct cs_section_def *sect = NULL;
3269         const struct cs_extent_def *ext = NULL;
3270         int r, i, tmp;
3271
3272         /* init the CP */
3273         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3274         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3275
3276         gfx_v9_0_cp_gfx_enable(adev, true);
3277
3278         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3279         if (r) {
3280                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3281                 return r;
3282         }
3283
3284         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3285         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3286
3287         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3288         amdgpu_ring_write(ring, 0x80000000);
3289         amdgpu_ring_write(ring, 0x80000000);
3290
3291         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3292                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3293                         if (sect->id == SECT_CONTEXT) {
3294                                 amdgpu_ring_write(ring,
3295                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3296                                                ext->reg_count));
3297                                 amdgpu_ring_write(ring,
3298                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3299                                 for (i = 0; i < ext->reg_count; i++)
3300                                         amdgpu_ring_write(ring, ext->extent[i]);
3301                         }
3302                 }
3303         }
3304
3305         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3306         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3307
3308         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3309         amdgpu_ring_write(ring, 0);
3310
3311         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3312         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3313         amdgpu_ring_write(ring, 0x8000);
3314         amdgpu_ring_write(ring, 0x8000);
3315
3316         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3317         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3318                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3319         amdgpu_ring_write(ring, tmp);
3320         amdgpu_ring_write(ring, 0);
3321
3322         amdgpu_ring_commit(ring);
3323
3324         return 0;
3325 }
3326
3327 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3328 {
3329         struct amdgpu_ring *ring;
3330         u32 tmp;
3331         u32 rb_bufsz;
3332         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3333
3334         /* Set the write pointer delay */
3335         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3336
3337         /* set the RB to use vmid 0 */
3338         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3339
3340         /* Set ring buffer size */
3341         ring = &adev->gfx.gfx_ring[0];
3342         rb_bufsz = order_base_2(ring->ring_size / 8);
3343         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3344         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3345 #ifdef __BIG_ENDIAN
3346         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3347 #endif
3348         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3349
3350         /* Initialize the ring buffer's write pointers */
3351         ring->wptr = 0;
3352         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3353         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3354
3355         /* set the wb address wether it's enabled or not */
3356         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3357         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3358         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3359
3360         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3361         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3362         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3363
3364         mdelay(1);
3365         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3366
3367         rb_addr = ring->gpu_addr >> 8;
3368         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3369         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3370
3371         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3372         if (ring->use_doorbell) {
3373                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3374                                     DOORBELL_OFFSET, ring->doorbell_index);
3375                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3376                                     DOORBELL_EN, 1);
3377         } else {
3378                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3379         }
3380         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3381
3382         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3383                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3384         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3385
3386         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3387                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3388
3389
3390         /* start the ring */
3391         gfx_v9_0_cp_gfx_start(adev);
3392         ring->sched.ready = true;
3393
3394         return 0;
3395 }
3396
3397 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3398 {
3399         if (enable) {
3400                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3401         } else {
3402                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3403                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3404                 adev->gfx.kiq.ring.sched.ready = false;
3405         }
3406         udelay(50);
3407 }
3408
3409 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3410 {
3411         const struct gfx_firmware_header_v1_0 *mec_hdr;
3412         const __le32 *fw_data;
3413         unsigned i;
3414         u32 tmp;
3415
3416         if (!adev->gfx.mec_fw)
3417                 return -EINVAL;
3418
3419         gfx_v9_0_cp_compute_enable(adev, false);
3420
3421         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3422         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3423
3424         fw_data = (const __le32 *)
3425                 (adev->gfx.mec_fw->data +
3426                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3427         tmp = 0;
3428         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3429         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3430         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3431
3432         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3433                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3434         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3435                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3436
3437         /* MEC1 */
3438         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3439                          mec_hdr->jt_offset);
3440         for (i = 0; i < mec_hdr->jt_size; i++)
3441                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3442                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3443
3444         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3445                         adev->gfx.mec_fw_version);
3446         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3447
3448         return 0;
3449 }
3450
3451 /* KIQ functions */
3452 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3453 {
3454         uint32_t tmp;
3455         struct amdgpu_device *adev = ring->adev;
3456
3457         /* tell RLC which is KIQ queue */
3458         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3459         tmp &= 0xffffff00;
3460         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3461         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3462         tmp |= 0x80;
3463         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3464 }
3465
3466 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3467 {
3468         struct amdgpu_device *adev = ring->adev;
3469
3470         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3471                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3472                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3473                         mqd->cp_hqd_queue_priority =
3474                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3475                 }
3476         }
3477 }
3478
3479 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3480 {
3481         struct amdgpu_device *adev = ring->adev;
3482         struct v9_mqd *mqd = ring->mqd_ptr;
3483         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3484         uint32_t tmp;
3485
3486         mqd->header = 0xC0310800;
3487         mqd->compute_pipelinestat_enable = 0x00000001;
3488         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3489         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3490         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3491         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3492         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3493         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3494         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3495         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3496         mqd->compute_misc_reserved = 0x00000003;
3497
3498         mqd->dynamic_cu_mask_addr_lo =
3499                 lower_32_bits(ring->mqd_gpu_addr
3500                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3501         mqd->dynamic_cu_mask_addr_hi =
3502                 upper_32_bits(ring->mqd_gpu_addr
3503                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3504
3505         eop_base_addr = ring->eop_gpu_addr >> 8;
3506         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3507         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3508
3509         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3510         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3511         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3512                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3513
3514         mqd->cp_hqd_eop_control = tmp;
3515
3516         /* enable doorbell? */
3517         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3518
3519         if (ring->use_doorbell) {
3520                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3521                                     DOORBELL_OFFSET, ring->doorbell_index);
3522                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3523                                     DOORBELL_EN, 1);
3524                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3525                                     DOORBELL_SOURCE, 0);
3526                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3527                                     DOORBELL_HIT, 0);
3528         } else {
3529                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3530                                          DOORBELL_EN, 0);
3531         }
3532
3533         mqd->cp_hqd_pq_doorbell_control = tmp;
3534
3535         /* disable the queue if it's active */
3536         ring->wptr = 0;
3537         mqd->cp_hqd_dequeue_request = 0;
3538         mqd->cp_hqd_pq_rptr = 0;
3539         mqd->cp_hqd_pq_wptr_lo = 0;
3540         mqd->cp_hqd_pq_wptr_hi = 0;
3541
3542         /* set the pointer to the MQD */
3543         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3544         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3545
3546         /* set MQD vmid to 0 */
3547         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3548         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3549         mqd->cp_mqd_control = tmp;
3550
3551         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3552         hqd_gpu_addr = ring->gpu_addr >> 8;
3553         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3554         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3555
3556         /* set up the HQD, this is similar to CP_RB0_CNTL */
3557         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3558         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3559                             (order_base_2(ring->ring_size / 4) - 1));
3560         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3561                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3562 #ifdef __BIG_ENDIAN
3563         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3564 #endif
3565         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3566         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3567         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3568         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3569         mqd->cp_hqd_pq_control = tmp;
3570
3571         /* set the wb address whether it's enabled or not */
3572         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3573         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3574         mqd->cp_hqd_pq_rptr_report_addr_hi =
3575                 upper_32_bits(wb_gpu_addr) & 0xffff;
3576
3577         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3578         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3579         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3580         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3581
3582         tmp = 0;
3583         /* enable the doorbell if requested */
3584         if (ring->use_doorbell) {
3585                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3586                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3587                                 DOORBELL_OFFSET, ring->doorbell_index);
3588
3589                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3590                                          DOORBELL_EN, 1);
3591                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3592                                          DOORBELL_SOURCE, 0);
3593                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3594                                          DOORBELL_HIT, 0);
3595         }
3596
3597         mqd->cp_hqd_pq_doorbell_control = tmp;
3598
3599         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3600         ring->wptr = 0;
3601         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3602
3603         /* set the vmid for the queue */
3604         mqd->cp_hqd_vmid = 0;
3605
3606         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3607         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3608         mqd->cp_hqd_persistent_state = tmp;
3609
3610         /* set MIN_IB_AVAIL_SIZE */
3611         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3612         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3613         mqd->cp_hqd_ib_control = tmp;
3614
3615         /* set static priority for a queue/ring */
3616         gfx_v9_0_mqd_set_priority(ring, mqd);
3617         mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3618
3619         /* map_queues packet doesn't need activate the queue,
3620          * so only kiq need set this field.
3621          */
3622         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3623                 mqd->cp_hqd_active = 1;
3624
3625         return 0;
3626 }
3627
3628 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3629 {
3630         struct amdgpu_device *adev = ring->adev;
3631         struct v9_mqd *mqd = ring->mqd_ptr;
3632         int j;
3633
3634         /* disable wptr polling */
3635         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3636
3637         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3638                mqd->cp_hqd_eop_base_addr_lo);
3639         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3640                mqd->cp_hqd_eop_base_addr_hi);
3641
3642         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3643         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3644                mqd->cp_hqd_eop_control);
3645
3646         /* enable doorbell? */
3647         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3648                mqd->cp_hqd_pq_doorbell_control);
3649
3650         /* disable the queue if it's active */
3651         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3652                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3653                 for (j = 0; j < adev->usec_timeout; j++) {
3654                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3655                                 break;
3656                         udelay(1);
3657                 }
3658                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3659                        mqd->cp_hqd_dequeue_request);
3660                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3661                        mqd->cp_hqd_pq_rptr);
3662                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3663                        mqd->cp_hqd_pq_wptr_lo);
3664                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3665                        mqd->cp_hqd_pq_wptr_hi);
3666         }
3667
3668         /* set the pointer to the MQD */
3669         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3670                mqd->cp_mqd_base_addr_lo);
3671         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3672                mqd->cp_mqd_base_addr_hi);
3673
3674         /* set MQD vmid to 0 */
3675         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3676                mqd->cp_mqd_control);
3677
3678         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3679         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3680                mqd->cp_hqd_pq_base_lo);
3681         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3682                mqd->cp_hqd_pq_base_hi);
3683
3684         /* set up the HQD, this is similar to CP_RB0_CNTL */
3685         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3686                mqd->cp_hqd_pq_control);
3687
3688         /* set the wb address whether it's enabled or not */
3689         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3690                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3691         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3692                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3693
3694         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3695         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3696                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3697         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3698                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3699
3700         /* enable the doorbell if requested */
3701         if (ring->use_doorbell) {
3702                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3703                                         (adev->doorbell_index.kiq * 2) << 2);
3704                 /* If GC has entered CGPG, ringing doorbell > first page
3705                  * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3706                  * workaround this issue. And this change has to align with firmware
3707                  * update.
3708                  */
3709                 if (check_if_enlarge_doorbell_range(adev))
3710                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3711                                         (adev->doorbell.size - 4));
3712                 else
3713                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3714                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3715         }
3716
3717         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3718                mqd->cp_hqd_pq_doorbell_control);
3719
3720         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3721         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3722                mqd->cp_hqd_pq_wptr_lo);
3723         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3724                mqd->cp_hqd_pq_wptr_hi);
3725
3726         /* set the vmid for the queue */
3727         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3728
3729         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3730                mqd->cp_hqd_persistent_state);
3731
3732         /* activate the queue */
3733         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3734                mqd->cp_hqd_active);
3735
3736         if (ring->use_doorbell)
3737                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3738
3739         return 0;
3740 }
3741
3742 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3743 {
3744         struct amdgpu_device *adev = ring->adev;
3745         int j;
3746
3747         /* disable the queue if it's active */
3748         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3749
3750                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3751
3752                 for (j = 0; j < adev->usec_timeout; j++) {
3753                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3754                                 break;
3755                         udelay(1);
3756                 }
3757
3758                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3759                         DRM_DEBUG("KIQ dequeue request failed.\n");
3760
3761                         /* Manual disable if dequeue request times out */
3762                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3763                 }
3764
3765                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3766                       0);
3767         }
3768
3769         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3770         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3771         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3772         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3773         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3774         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3775         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3776         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3777
3778         return 0;
3779 }
3780
3781 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3782 {
3783         struct amdgpu_device *adev = ring->adev;
3784         struct v9_mqd *mqd = ring->mqd_ptr;
3785         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3786         struct v9_mqd *tmp_mqd;
3787
3788         gfx_v9_0_kiq_setting(ring);
3789
3790         /* GPU could be in bad state during probe, driver trigger the reset
3791          * after load the SMU, in this case , the mqd is not be initialized.
3792          * driver need to re-init the mqd.
3793          * check mqd->cp_hqd_pq_control since this value should not be 0
3794          */
3795         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3796         if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3797                 /* for GPU_RESET case , reset MQD to a clean status */
3798                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3799                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3800
3801                 /* reset ring buffer */
3802                 ring->wptr = 0;
3803                 amdgpu_ring_clear_ring(ring);
3804
3805                 mutex_lock(&adev->srbm_mutex);
3806                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3807                 gfx_v9_0_kiq_init_register(ring);
3808                 soc15_grbm_select(adev, 0, 0, 0, 0);
3809                 mutex_unlock(&adev->srbm_mutex);
3810         } else {
3811                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3812                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3813                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3814                 mutex_lock(&adev->srbm_mutex);
3815                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3816                 gfx_v9_0_mqd_init(ring);
3817                 gfx_v9_0_kiq_init_register(ring);
3818                 soc15_grbm_select(adev, 0, 0, 0, 0);
3819                 mutex_unlock(&adev->srbm_mutex);
3820
3821                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3822                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3823         }
3824
3825         return 0;
3826 }
3827
3828 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3829 {
3830         struct amdgpu_device *adev = ring->adev;
3831         struct v9_mqd *mqd = ring->mqd_ptr;
3832         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3833         struct v9_mqd *tmp_mqd;
3834
3835         /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3836          * is not be initialized before
3837          */
3838         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3839
3840         if (!tmp_mqd->cp_hqd_pq_control ||
3841             (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3842                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3843                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3844                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3845                 mutex_lock(&adev->srbm_mutex);
3846                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3847                 gfx_v9_0_mqd_init(ring);
3848                 soc15_grbm_select(adev, 0, 0, 0, 0);
3849                 mutex_unlock(&adev->srbm_mutex);
3850
3851                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3852                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3853         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3854                 /* reset MQD to a clean status */
3855                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3856                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3857
3858                 /* reset ring buffer */
3859                 ring->wptr = 0;
3860                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3861                 amdgpu_ring_clear_ring(ring);
3862         } else {
3863                 amdgpu_ring_clear_ring(ring);
3864         }
3865
3866         return 0;
3867 }
3868
3869 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3870 {
3871         struct amdgpu_ring *ring;
3872         int r;
3873
3874         ring = &adev->gfx.kiq.ring;
3875
3876         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3877         if (unlikely(r != 0))
3878                 return r;
3879
3880         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3881         if (unlikely(r != 0))
3882                 return r;
3883
3884         gfx_v9_0_kiq_init_queue(ring);
3885         amdgpu_bo_kunmap(ring->mqd_obj);
3886         ring->mqd_ptr = NULL;
3887         amdgpu_bo_unreserve(ring->mqd_obj);
3888         ring->sched.ready = true;
3889         return 0;
3890 }
3891
3892 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3893 {
3894         struct amdgpu_ring *ring = NULL;
3895         int r = 0, i;
3896
3897         gfx_v9_0_cp_compute_enable(adev, true);
3898
3899         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3900                 ring = &adev->gfx.compute_ring[i];
3901
3902                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3903                 if (unlikely(r != 0))
3904                         goto done;
3905                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3906                 if (!r) {
3907                         r = gfx_v9_0_kcq_init_queue(ring);
3908                         amdgpu_bo_kunmap(ring->mqd_obj);
3909                         ring->mqd_ptr = NULL;
3910                 }
3911                 amdgpu_bo_unreserve(ring->mqd_obj);
3912                 if (r)
3913                         goto done;
3914         }
3915
3916         r = amdgpu_gfx_enable_kcq(adev);
3917 done:
3918         return r;
3919 }
3920
3921 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3922 {
3923         int r, i;
3924         struct amdgpu_ring *ring;
3925
3926         if (!(adev->flags & AMD_IS_APU))
3927                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3928
3929         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3930                 if (adev->gfx.num_gfx_rings) {
3931                         /* legacy firmware loading */
3932                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3933                         if (r)
3934                                 return r;
3935                 }
3936
3937                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3938                 if (r)
3939                         return r;
3940         }
3941
3942         r = gfx_v9_0_kiq_resume(adev);
3943         if (r)
3944                 return r;
3945
3946         if (adev->gfx.num_gfx_rings) {
3947                 r = gfx_v9_0_cp_gfx_resume(adev);
3948                 if (r)
3949                         return r;
3950         }
3951
3952         r = gfx_v9_0_kcq_resume(adev);
3953         if (r)
3954                 return r;
3955
3956         if (adev->gfx.num_gfx_rings) {
3957                 ring = &adev->gfx.gfx_ring[0];
3958                 r = amdgpu_ring_test_helper(ring);
3959                 if (r)
3960                         return r;
3961         }
3962
3963         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3964                 ring = &adev->gfx.compute_ring[i];
3965                 amdgpu_ring_test_helper(ring);
3966         }
3967
3968         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3969
3970         return 0;
3971 }
3972
3973 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3974 {
3975         u32 tmp;
3976
3977         if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3978             adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3979                 return;
3980
3981         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3982         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3983                                 adev->df.hash_status.hash_64k);
3984         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3985                                 adev->df.hash_status.hash_2m);
3986         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3987                                 adev->df.hash_status.hash_1g);
3988         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3989 }
3990
3991 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3992 {
3993         if (adev->gfx.num_gfx_rings)
3994                 gfx_v9_0_cp_gfx_enable(adev, enable);
3995         gfx_v9_0_cp_compute_enable(adev, enable);
3996 }
3997
3998 static int gfx_v9_0_hw_init(void *handle)
3999 {
4000         int r;
4001         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4002
4003         if (!amdgpu_sriov_vf(adev))
4004                 gfx_v9_0_init_golden_registers(adev);
4005
4006         gfx_v9_0_constants_init(adev);
4007
4008         gfx_v9_0_init_tcp_config(adev);
4009
4010         r = adev->gfx.rlc.funcs->resume(adev);
4011         if (r)
4012                 return r;
4013
4014         r = gfx_v9_0_cp_resume(adev);
4015         if (r)
4016                 return r;
4017
4018         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4019                 gfx_v9_4_2_set_power_brake_sequence(adev);
4020
4021         return r;
4022 }
4023
4024 static int gfx_v9_0_hw_fini(void *handle)
4025 {
4026         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4027
4028         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4029         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4030         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4031
4032         /* DF freeze and kcq disable will fail */
4033         if (!amdgpu_ras_intr_triggered())
4034                 /* disable KCQ to avoid CPC touch memory not valid anymore */
4035                 amdgpu_gfx_disable_kcq(adev);
4036
4037         if (amdgpu_sriov_vf(adev)) {
4038                 gfx_v9_0_cp_gfx_enable(adev, false);
4039                 /* must disable polling for SRIOV when hw finished, otherwise
4040                  * CPC engine may still keep fetching WB address which is already
4041                  * invalid after sw finished and trigger DMAR reading error in
4042                  * hypervisor side.
4043                  */
4044                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4045                 return 0;
4046         }
4047
4048         /* Use deinitialize sequence from CAIL when unbinding device from driver,
4049          * otherwise KIQ is hanging when binding back
4050          */
4051         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4052                 mutex_lock(&adev->srbm_mutex);
4053                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4054                                 adev->gfx.kiq.ring.pipe,
4055                                 adev->gfx.kiq.ring.queue, 0);
4056                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4057                 soc15_grbm_select(adev, 0, 0, 0, 0);
4058                 mutex_unlock(&adev->srbm_mutex);
4059         }
4060
4061         gfx_v9_0_cp_enable(adev, false);
4062
4063         /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4064         if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4065             (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4066                 dev_dbg(adev->dev, "Skipping RLC halt\n");
4067                 return 0;
4068         }
4069
4070         adev->gfx.rlc.funcs->stop(adev);
4071         return 0;
4072 }
4073
4074 static int gfx_v9_0_suspend(void *handle)
4075 {
4076         return gfx_v9_0_hw_fini(handle);
4077 }
4078
4079 static int gfx_v9_0_resume(void *handle)
4080 {
4081         return gfx_v9_0_hw_init(handle);
4082 }
4083
4084 static bool gfx_v9_0_is_idle(void *handle)
4085 {
4086         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4087
4088         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4089                                 GRBM_STATUS, GUI_ACTIVE))
4090                 return false;
4091         else
4092                 return true;
4093 }
4094
4095 static int gfx_v9_0_wait_for_idle(void *handle)
4096 {
4097         unsigned i;
4098         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4099
4100         for (i = 0; i < adev->usec_timeout; i++) {
4101                 if (gfx_v9_0_is_idle(handle))
4102                         return 0;
4103                 udelay(1);
4104         }
4105         return -ETIMEDOUT;
4106 }
4107
4108 static int gfx_v9_0_soft_reset(void *handle)
4109 {
4110         u32 grbm_soft_reset = 0;
4111         u32 tmp;
4112         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4113
4114         /* GRBM_STATUS */
4115         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4116         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4117                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4118                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4119                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4120                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4121                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4122                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4123                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4124                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4125                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4126         }
4127
4128         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4129                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4130                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4131         }
4132
4133         /* GRBM_STATUS2 */
4134         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4135         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4136                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4137                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4138
4139
4140         if (grbm_soft_reset) {
4141                 /* stop the rlc */
4142                 adev->gfx.rlc.funcs->stop(adev);
4143
4144                 if (adev->gfx.num_gfx_rings)
4145                         /* Disable GFX parsing/prefetching */
4146                         gfx_v9_0_cp_gfx_enable(adev, false);
4147
4148                 /* Disable MEC parsing/prefetching */
4149                 gfx_v9_0_cp_compute_enable(adev, false);
4150
4151                 if (grbm_soft_reset) {
4152                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4153                         tmp |= grbm_soft_reset;
4154                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4155                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4156                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4157
4158                         udelay(50);
4159
4160                         tmp &= ~grbm_soft_reset;
4161                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4162                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4163                 }
4164
4165                 /* Wait a little for things to settle down */
4166                 udelay(50);
4167         }
4168         return 0;
4169 }
4170
4171 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4172 {
4173         signed long r, cnt = 0;
4174         unsigned long flags;
4175         uint32_t seq, reg_val_offs = 0;
4176         uint64_t value = 0;
4177         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4178         struct amdgpu_ring *ring = &kiq->ring;
4179
4180         BUG_ON(!ring->funcs->emit_rreg);
4181
4182         spin_lock_irqsave(&kiq->ring_lock, flags);
4183         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4184                 pr_err("critical bug! too many kiq readers\n");
4185                 goto failed_unlock;
4186         }
4187         amdgpu_ring_alloc(ring, 32);
4188         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4189         amdgpu_ring_write(ring, 9 |     /* src: register*/
4190                                 (5 << 8) |      /* dst: memory */
4191                                 (1 << 16) |     /* count sel */
4192                                 (1 << 20));     /* write confirm */
4193         amdgpu_ring_write(ring, 0);
4194         amdgpu_ring_write(ring, 0);
4195         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4196                                 reg_val_offs * 4));
4197         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4198                                 reg_val_offs * 4));
4199         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4200         if (r)
4201                 goto failed_undo;
4202
4203         amdgpu_ring_commit(ring);
4204         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4205
4206         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4207
4208         /* don't wait anymore for gpu reset case because this way may
4209          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4210          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4211          * never return if we keep waiting in virt_kiq_rreg, which cause
4212          * gpu_recover() hang there.
4213          *
4214          * also don't wait anymore for IRQ context
4215          * */
4216         if (r < 1 && (amdgpu_in_reset(adev)))
4217                 goto failed_kiq_read;
4218
4219         might_sleep();
4220         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4221                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4222                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4223         }
4224
4225         if (cnt > MAX_KIQ_REG_TRY)
4226                 goto failed_kiq_read;
4227
4228         mb();
4229         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4230                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4231         amdgpu_device_wb_free(adev, reg_val_offs);
4232         return value;
4233
4234 failed_undo:
4235         amdgpu_ring_undo(ring);
4236 failed_unlock:
4237         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4238 failed_kiq_read:
4239         if (reg_val_offs)
4240                 amdgpu_device_wb_free(adev, reg_val_offs);
4241         pr_err("failed to read gpu clock\n");
4242         return ~0;
4243 }
4244
4245 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4246 {
4247         uint64_t clock, clock_lo, clock_hi, hi_check;
4248
4249         switch (adev->ip_versions[GC_HWIP][0]) {
4250         case IP_VERSION(9, 3, 0):
4251                 preempt_disable();
4252                 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4253                 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4254                 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4255                 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4256                  * roughly every 42 seconds.
4257                  */
4258                 if (hi_check != clock_hi) {
4259                         clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4260                         clock_hi = hi_check;
4261                 }
4262                 preempt_enable();
4263                 clock = clock_lo | (clock_hi << 32ULL);
4264                 break;
4265         default:
4266                 amdgpu_gfx_off_ctrl(adev, false);
4267                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4268                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4269                         clock = gfx_v9_0_kiq_read_clock(adev);
4270                 } else {
4271                         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4272                         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4273                                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4274                 }
4275                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4276                 amdgpu_gfx_off_ctrl(adev, true);
4277                 break;
4278         }
4279         return clock;
4280 }
4281
4282 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4283                                           uint32_t vmid,
4284                                           uint32_t gds_base, uint32_t gds_size,
4285                                           uint32_t gws_base, uint32_t gws_size,
4286                                           uint32_t oa_base, uint32_t oa_size)
4287 {
4288         struct amdgpu_device *adev = ring->adev;
4289
4290         /* GDS Base */
4291         gfx_v9_0_write_data_to_reg(ring, 0, false,
4292                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4293                                    gds_base);
4294
4295         /* GDS Size */
4296         gfx_v9_0_write_data_to_reg(ring, 0, false,
4297                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4298                                    gds_size);
4299
4300         /* GWS */
4301         gfx_v9_0_write_data_to_reg(ring, 0, false,
4302                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4303                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4304
4305         /* OA */
4306         gfx_v9_0_write_data_to_reg(ring, 0, false,
4307                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4308                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4309 }
4310
4311 static const u32 vgpr_init_compute_shader[] =
4312 {
4313         0xb07c0000, 0xbe8000ff,
4314         0x000000f8, 0xbf110800,
4315         0x7e000280, 0x7e020280,
4316         0x7e040280, 0x7e060280,
4317         0x7e080280, 0x7e0a0280,
4318         0x7e0c0280, 0x7e0e0280,
4319         0x80808800, 0xbe803200,
4320         0xbf84fff5, 0xbf9c0000,
4321         0xd28c0001, 0x0001007f,
4322         0xd28d0001, 0x0002027e,
4323         0x10020288, 0xb8810904,
4324         0xb7814000, 0xd1196a01,
4325         0x00000301, 0xbe800087,
4326         0xbefc00c1, 0xd89c4000,
4327         0x00020201, 0xd89cc080,
4328         0x00040401, 0x320202ff,
4329         0x00000800, 0x80808100,
4330         0xbf84fff8, 0x7e020280,
4331         0xbf810000, 0x00000000,
4332 };
4333
4334 static const u32 sgpr_init_compute_shader[] =
4335 {
4336         0xb07c0000, 0xbe8000ff,
4337         0x0000005f, 0xbee50080,
4338         0xbe812c65, 0xbe822c65,
4339         0xbe832c65, 0xbe842c65,
4340         0xbe852c65, 0xb77c0005,
4341         0x80808500, 0xbf84fff8,
4342         0xbe800080, 0xbf810000,
4343 };
4344
4345 static const u32 vgpr_init_compute_shader_arcturus[] = {
4346         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4347         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4348         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4349         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4350         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4351         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4352         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4353         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4354         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4355         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4356         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4357         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4358         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4359         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4360         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4361         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4362         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4363         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4364         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4365         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4366         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4367         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4368         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4369         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4370         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4371         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4372         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4373         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4374         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4375         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4376         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4377         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4378         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4379         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4380         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4381         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4382         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4383         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4384         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4385         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4386         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4387         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4388         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4389         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4390         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4391         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4392         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4393         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4394         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4395         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4396         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4397         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4398         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4399         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4400         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4401         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4402         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4403         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4404         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4405         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4406         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4407         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4408         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4409         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4410         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4411         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4412         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4413         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4414         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4415         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4416         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4417         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4418         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4419         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4420         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4421         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4422         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4423         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4424         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4425         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4426         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4427         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4428         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4429         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4430         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4431         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4432         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4433         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4434         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4435         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4436         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4437         0xbf84fff8, 0xbf810000,
4438 };
4439
4440 /* When below register arrays changed, please update gpr_reg_size,
4441   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4442   to cover all gfx9 ASICs */
4443 static const struct soc15_reg_entry vgpr_init_regs[] = {
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4448    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4449    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4450    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4451    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4453    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4454    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4455    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4458 };
4459
4460 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4465    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4466    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4475 };
4476
4477 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4483    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4487    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4488    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4489    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4490    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4491    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4492 };
4493
4494 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4495    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4496    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4497    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4498    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4499    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4500    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4501    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4502    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4503    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4504    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4505    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4506    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4507    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4508    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4509 };
4510
4511 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4512    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4513    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4514    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4515    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4516    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4517    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4518    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4519    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4520    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4521    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4522    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4523    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4524    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4525    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4526    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4527    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4528    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4529    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4530    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4531    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4532    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4533    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4534    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4535    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4536    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4537    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4538    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4539    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4540    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4541    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4542    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4543    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4544    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4545 };
4546
4547 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4548 {
4549         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4550         int i, r;
4551
4552         /* only support when RAS is enabled */
4553         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4554                 return 0;
4555
4556         r = amdgpu_ring_alloc(ring, 7);
4557         if (r) {
4558                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4559                         ring->name, r);
4560                 return r;
4561         }
4562
4563         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4564         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4565
4566         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4567         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4568                                 PACKET3_DMA_DATA_DST_SEL(1) |
4569                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4570                                 PACKET3_DMA_DATA_ENGINE(0)));
4571         amdgpu_ring_write(ring, 0);
4572         amdgpu_ring_write(ring, 0);
4573         amdgpu_ring_write(ring, 0);
4574         amdgpu_ring_write(ring, 0);
4575         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4576                                 adev->gds.gds_size);
4577
4578         amdgpu_ring_commit(ring);
4579
4580         for (i = 0; i < adev->usec_timeout; i++) {
4581                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4582                         break;
4583                 udelay(1);
4584         }
4585
4586         if (i >= adev->usec_timeout)
4587                 r = -ETIMEDOUT;
4588
4589         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4590
4591         return r;
4592 }
4593
4594 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4595 {
4596         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4597         struct amdgpu_ib ib;
4598         struct dma_fence *f = NULL;
4599         int r, i;
4600         unsigned total_size, vgpr_offset, sgpr_offset;
4601         u64 gpu_addr;
4602
4603         int compute_dim_x = adev->gfx.config.max_shader_engines *
4604                                                 adev->gfx.config.max_cu_per_sh *
4605                                                 adev->gfx.config.max_sh_per_se;
4606         int sgpr_work_group_size = 5;
4607         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4608         int vgpr_init_shader_size;
4609         const u32 *vgpr_init_shader_ptr;
4610         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4611
4612         /* only support when RAS is enabled */
4613         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4614                 return 0;
4615
4616         /* bail if the compute ring is not ready */
4617         if (!ring->sched.ready)
4618                 return 0;
4619
4620         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4621                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4622                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4623                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4624         } else {
4625                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4626                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4627                 vgpr_init_regs_ptr = vgpr_init_regs;
4628         }
4629
4630         total_size =
4631                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4632         total_size +=
4633                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4634         total_size +=
4635                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4636         total_size = ALIGN(total_size, 256);
4637         vgpr_offset = total_size;
4638         total_size += ALIGN(vgpr_init_shader_size, 256);
4639         sgpr_offset = total_size;
4640         total_size += sizeof(sgpr_init_compute_shader);
4641
4642         /* allocate an indirect buffer to put the commands in */
4643         memset(&ib, 0, sizeof(ib));
4644         r = amdgpu_ib_get(adev, NULL, total_size,
4645                                         AMDGPU_IB_POOL_DIRECT, &ib);
4646         if (r) {
4647                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4648                 return r;
4649         }
4650
4651         /* load the compute shaders */
4652         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4653                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4654
4655         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4656                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4657
4658         /* init the ib length to 0 */
4659         ib.length_dw = 0;
4660
4661         /* VGPR */
4662         /* write the register state for the compute dispatch */
4663         for (i = 0; i < gpr_reg_size; i++) {
4664                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4665                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4666                                                                 - PACKET3_SET_SH_REG_START;
4667                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4668         }
4669         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4670         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4671         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4672         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4673                                                         - PACKET3_SET_SH_REG_START;
4674         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4675         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4676
4677         /* write dispatch packet */
4678         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4679         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4680         ib.ptr[ib.length_dw++] = 1; /* y */
4681         ib.ptr[ib.length_dw++] = 1; /* z */
4682         ib.ptr[ib.length_dw++] =
4683                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4684
4685         /* write CS partial flush packet */
4686         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4687         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4688
4689         /* SGPR1 */
4690         /* write the register state for the compute dispatch */
4691         for (i = 0; i < gpr_reg_size; i++) {
4692                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4693                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4694                                                                 - PACKET3_SET_SH_REG_START;
4695                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4696         }
4697         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4698         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4699         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4700         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4701                                                         - PACKET3_SET_SH_REG_START;
4702         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4703         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4704
4705         /* write dispatch packet */
4706         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4707         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4708         ib.ptr[ib.length_dw++] = 1; /* y */
4709         ib.ptr[ib.length_dw++] = 1; /* z */
4710         ib.ptr[ib.length_dw++] =
4711                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4712
4713         /* write CS partial flush packet */
4714         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4715         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4716
4717         /* SGPR2 */
4718         /* write the register state for the compute dispatch */
4719         for (i = 0; i < gpr_reg_size; i++) {
4720                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4721                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4722                                                                 - PACKET3_SET_SH_REG_START;
4723                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4724         }
4725         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4726         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4727         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4728         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4729                                                         - PACKET3_SET_SH_REG_START;
4730         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4731         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4732
4733         /* write dispatch packet */
4734         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4735         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4736         ib.ptr[ib.length_dw++] = 1; /* y */
4737         ib.ptr[ib.length_dw++] = 1; /* z */
4738         ib.ptr[ib.length_dw++] =
4739                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4740
4741         /* write CS partial flush packet */
4742         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4743         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4744
4745         /* shedule the ib on the ring */
4746         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4747         if (r) {
4748                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4749                 goto fail;
4750         }
4751
4752         /* wait for the GPU to finish processing the IB */
4753         r = dma_fence_wait(f, false);
4754         if (r) {
4755                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4756                 goto fail;
4757         }
4758
4759 fail:
4760         amdgpu_ib_free(adev, &ib, NULL);
4761         dma_fence_put(f);
4762
4763         return r;
4764 }
4765
4766 static int gfx_v9_0_early_init(void *handle)
4767 {
4768         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4769
4770         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4771             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4772                 adev->gfx.num_gfx_rings = 0;
4773         else
4774                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4775         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4776                                           AMDGPU_MAX_COMPUTE_RINGS);
4777         gfx_v9_0_set_kiq_pm4_funcs(adev);
4778         gfx_v9_0_set_ring_funcs(adev);
4779         gfx_v9_0_set_irq_funcs(adev);
4780         gfx_v9_0_set_gds_init(adev);
4781         gfx_v9_0_set_rlc_funcs(adev);
4782
4783         return 0;
4784 }
4785
4786 static int gfx_v9_0_ecc_late_init(void *handle)
4787 {
4788         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4789         int r;
4790
4791         /*
4792          * Temp workaround to fix the issue that CP firmware fails to
4793          * update read pointer when CPDMA is writing clearing operation
4794          * to GDS in suspend/resume sequence on several cards. So just
4795          * limit this operation in cold boot sequence.
4796          */
4797         if ((!adev->in_suspend) &&
4798             (adev->gds.gds_size)) {
4799                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4800                 if (r)
4801                         return r;
4802         }
4803
4804         /* requires IBs so do in late init after IB pool is initialized */
4805         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4806                 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4807         else
4808                 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4809
4810         if (r)
4811                 return r;
4812
4813         if (adev->gfx.ras_funcs &&
4814             adev->gfx.ras_funcs->ras_late_init) {
4815                 r = adev->gfx.ras_funcs->ras_late_init(adev);
4816                 if (r)
4817                         return r;
4818         }
4819
4820         if (adev->gfx.ras_funcs &&
4821             adev->gfx.ras_funcs->enable_watchdog_timer)
4822                 adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4823
4824         return 0;
4825 }
4826
4827 static int gfx_v9_0_late_init(void *handle)
4828 {
4829         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830         int r;
4831
4832         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4833         if (r)
4834                 return r;
4835
4836         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4837         if (r)
4838                 return r;
4839
4840         r = gfx_v9_0_ecc_late_init(handle);
4841         if (r)
4842                 return r;
4843
4844         return 0;
4845 }
4846
4847 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4848 {
4849         uint32_t rlc_setting;
4850
4851         /* if RLC is not enabled, do nothing */
4852         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4853         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4854                 return false;
4855
4856         return true;
4857 }
4858
4859 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4860 {
4861         uint32_t data;
4862         unsigned i;
4863
4864         data = RLC_SAFE_MODE__CMD_MASK;
4865         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4866         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4867
4868         /* wait for RLC_SAFE_MODE */
4869         for (i = 0; i < adev->usec_timeout; i++) {
4870                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4871                         break;
4872                 udelay(1);
4873         }
4874 }
4875
4876 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4877 {
4878         uint32_t data;
4879
4880         data = RLC_SAFE_MODE__CMD_MASK;
4881         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4882 }
4883
4884 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4885                                                 bool enable)
4886 {
4887         amdgpu_gfx_rlc_enter_safe_mode(adev);
4888
4889         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4890                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4891                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4892                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4893         } else {
4894                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4895                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4896                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4897         }
4898
4899         amdgpu_gfx_rlc_exit_safe_mode(adev);
4900 }
4901
4902 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4903                                                 bool enable)
4904 {
4905         /* TODO: double check if we need to perform under safe mode */
4906         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4907
4908         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4909                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4910         else
4911                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4912
4913         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4914                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4915         else
4916                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4917
4918         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4919 }
4920
4921 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4922                                                       bool enable)
4923 {
4924         uint32_t data, def;
4925
4926         amdgpu_gfx_rlc_enter_safe_mode(adev);
4927
4928         /* It is disabled by HW by default */
4929         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4930                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4931                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4932
4933                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4934                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4935
4936                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4937                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4938                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4939
4940                 /* only for Vega10 & Raven1 */
4941                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4942
4943                 if (def != data)
4944                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4945
4946                 /* MGLS is a global flag to control all MGLS in GFX */
4947                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4948                         /* 2 - RLC memory Light sleep */
4949                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4950                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4951                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4952                                 if (def != data)
4953                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4954                         }
4955                         /* 3 - CP memory Light sleep */
4956                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4957                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4958                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4959                                 if (def != data)
4960                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4961                         }
4962                 }
4963         } else {
4964                 /* 1 - MGCG_OVERRIDE */
4965                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4966
4967                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4968                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4969
4970                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4971                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4972                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4973                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4974
4975                 if (def != data)
4976                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4977
4978                 /* 2 - disable MGLS in RLC */
4979                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4980                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4981                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4982                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4983                 }
4984
4985                 /* 3 - disable MGLS in CP */
4986                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4987                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4988                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4989                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4990                 }
4991         }
4992
4993         amdgpu_gfx_rlc_exit_safe_mode(adev);
4994 }
4995
4996 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4997                                            bool enable)
4998 {
4999         uint32_t data, def;
5000
5001         if (!adev->gfx.num_gfx_rings)
5002                 return;
5003
5004         amdgpu_gfx_rlc_enter_safe_mode(adev);
5005
5006         /* Enable 3D CGCG/CGLS */
5007         if (enable) {
5008                 /* write cmd to clear cgcg/cgls ov */
5009                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5010                 /* unset CGCG override */
5011                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5012                 /* update CGCG and CGLS override bits */
5013                 if (def != data)
5014                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5015
5016                 /* enable 3Dcgcg FSM(0x0000363f) */
5017                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5018
5019                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5020                         data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5021                                 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5022                 else
5023                         data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5024
5025                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5026                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5027                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5028                 if (def != data)
5029                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5030
5031                 /* set IDLE_POLL_COUNT(0x00900100) */
5032                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5033                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5034                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5035                 if (def != data)
5036                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5037         } else {
5038                 /* Disable CGCG/CGLS */
5039                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5040                 /* disable cgcg, cgls should be disabled */
5041                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5042                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5043                 /* disable cgcg and cgls in FSM */
5044                 if (def != data)
5045                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5046         }
5047
5048         amdgpu_gfx_rlc_exit_safe_mode(adev);
5049 }
5050
5051 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5052                                                       bool enable)
5053 {
5054         uint32_t def, data;
5055
5056         amdgpu_gfx_rlc_enter_safe_mode(adev);
5057
5058         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5059                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5060                 /* unset CGCG override */
5061                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5062                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5063                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5064                 else
5065                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5066                 /* update CGCG and CGLS override bits */
5067                 if (def != data)
5068                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5069
5070                 /* enable cgcg FSM(0x0000363F) */
5071                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5072
5073                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5074                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5075                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5076                 else
5077                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5078                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5079                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5080                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5081                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5082                 if (def != data)
5083                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5084
5085                 /* set IDLE_POLL_COUNT(0x00900100) */
5086                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5087                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5088                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5089                 if (def != data)
5090                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5091         } else {
5092                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5093                 /* reset CGCG/CGLS bits */
5094                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5095                 /* disable cgcg and cgls in FSM */
5096                 if (def != data)
5097                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5098         }
5099
5100         amdgpu_gfx_rlc_exit_safe_mode(adev);
5101 }
5102
5103 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5104                                             bool enable)
5105 {
5106         if (enable) {
5107                 /* CGCG/CGLS should be enabled after MGCG/MGLS
5108                  * ===  MGCG + MGLS ===
5109                  */
5110                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5111                 /* ===  CGCG /CGLS for GFX 3D Only === */
5112                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5113                 /* ===  CGCG + CGLS === */
5114                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5115         } else {
5116                 /* CGCG/CGLS should be disabled before MGCG/MGLS
5117                  * ===  CGCG + CGLS ===
5118                  */
5119                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5120                 /* ===  CGCG /CGLS for GFX 3D Only === */
5121                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5122                 /* ===  MGCG + MGLS === */
5123                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5124         }
5125         return 0;
5126 }
5127
5128 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5129 {
5130         u32 reg, data;
5131
5132         amdgpu_gfx_off_ctrl(adev, false);
5133
5134         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5135         if (amdgpu_sriov_is_pp_one_vf(adev))
5136                 data = RREG32_NO_KIQ(reg);
5137         else
5138                 data = RREG32(reg);
5139
5140         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5141         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5142
5143         if (amdgpu_sriov_is_pp_one_vf(adev))
5144                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5145         else
5146                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5147
5148         amdgpu_gfx_off_ctrl(adev, true);
5149 }
5150
5151 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5152                                         uint32_t offset,
5153                                         struct soc15_reg_rlcg *entries, int arr_size)
5154 {
5155         int i;
5156         uint32_t reg;
5157
5158         if (!entries)
5159                 return false;
5160
5161         for (i = 0; i < arr_size; i++) {
5162                 const struct soc15_reg_rlcg *entry;
5163
5164                 entry = &entries[i];
5165                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5166                 if (offset == reg)
5167                         return true;
5168         }
5169
5170         return false;
5171 }
5172
5173 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5174 {
5175         return gfx_v9_0_check_rlcg_range(adev, offset,
5176                                         (void *)rlcg_access_gc_9_0,
5177                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5178 }
5179
5180 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5181         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5182         .set_safe_mode = gfx_v9_0_set_safe_mode,
5183         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5184         .init = gfx_v9_0_rlc_init,
5185         .get_csb_size = gfx_v9_0_get_csb_size,
5186         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5187         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5188         .resume = gfx_v9_0_rlc_resume,
5189         .stop = gfx_v9_0_rlc_stop,
5190         .reset = gfx_v9_0_rlc_reset,
5191         .start = gfx_v9_0_rlc_start,
5192         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5193         .sriov_wreg = gfx_v9_0_sriov_wreg,
5194         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5195 };
5196
5197 static int gfx_v9_0_set_powergating_state(void *handle,
5198                                           enum amd_powergating_state state)
5199 {
5200         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5201         bool enable = (state == AMD_PG_STATE_GATE);
5202
5203         switch (adev->ip_versions[GC_HWIP][0]) {
5204         case IP_VERSION(9, 2, 2):
5205         case IP_VERSION(9, 1, 0):
5206         case IP_VERSION(9, 3, 0):
5207                 if (!enable)
5208                         amdgpu_gfx_off_ctrl(adev, false);
5209
5210                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5211                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5212                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5213                 } else {
5214                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5215                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5216                 }
5217
5218                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5219                         gfx_v9_0_enable_cp_power_gating(adev, true);
5220                 else
5221                         gfx_v9_0_enable_cp_power_gating(adev, false);
5222
5223                 /* update gfx cgpg state */
5224                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5225
5226                 /* update mgcg state */
5227                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5228
5229                 if (enable)
5230                         amdgpu_gfx_off_ctrl(adev, true);
5231                 break;
5232         case IP_VERSION(9, 2, 1):
5233                 amdgpu_gfx_off_ctrl(adev, enable);
5234                 break;
5235         default:
5236                 break;
5237         }
5238
5239         return 0;
5240 }
5241
5242 static int gfx_v9_0_set_clockgating_state(void *handle,
5243                                           enum amd_clockgating_state state)
5244 {
5245         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5246
5247         if (amdgpu_sriov_vf(adev))
5248                 return 0;
5249
5250         switch (adev->ip_versions[GC_HWIP][0]) {
5251         case IP_VERSION(9, 0, 1):
5252         case IP_VERSION(9, 2, 1):
5253         case IP_VERSION(9, 4, 0):
5254         case IP_VERSION(9, 2, 2):
5255         case IP_VERSION(9, 1, 0):
5256         case IP_VERSION(9, 4, 1):
5257         case IP_VERSION(9, 3, 0):
5258         case IP_VERSION(9, 4, 2):
5259                 gfx_v9_0_update_gfx_clock_gating(adev,
5260                                                  state == AMD_CG_STATE_GATE);
5261                 break;
5262         default:
5263                 break;
5264         }
5265         return 0;
5266 }
5267
5268 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5269 {
5270         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5271         int data;
5272
5273         if (amdgpu_sriov_vf(adev))
5274                 *flags = 0;
5275
5276         /* AMD_CG_SUPPORT_GFX_MGCG */
5277         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5278         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5279                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5280
5281         /* AMD_CG_SUPPORT_GFX_CGCG */
5282         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5283         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5284                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5285
5286         /* AMD_CG_SUPPORT_GFX_CGLS */
5287         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5288                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5289
5290         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5291         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5292         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5293                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5294
5295         /* AMD_CG_SUPPORT_GFX_CP_LS */
5296         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5297         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5298                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5299
5300         if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5301                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5302                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5303                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5304                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5305
5306                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5307                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5308                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5309         }
5310 }
5311
5312 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5313 {
5314         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5315 }
5316
5317 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5318 {
5319         struct amdgpu_device *adev = ring->adev;
5320         u64 wptr;
5321
5322         /* XXX check if swapping is necessary on BE */
5323         if (ring->use_doorbell) {
5324                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5325         } else {
5326                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5327                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5328         }
5329
5330         return wptr;
5331 }
5332
5333 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5334 {
5335         struct amdgpu_device *adev = ring->adev;
5336
5337         if (ring->use_doorbell) {
5338                 /* XXX check if swapping is necessary on BE */
5339                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5340                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5341         } else {
5342                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5343                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5344         }
5345 }
5346
5347 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5348 {
5349         struct amdgpu_device *adev = ring->adev;
5350         u32 ref_and_mask, reg_mem_engine;
5351         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5352
5353         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5354                 switch (ring->me) {
5355                 case 1:
5356                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5357                         break;
5358                 case 2:
5359                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5360                         break;
5361                 default:
5362                         return;
5363                 }
5364                 reg_mem_engine = 0;
5365         } else {
5366                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5367                 reg_mem_engine = 1; /* pfp */
5368         }
5369
5370         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5371                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5372                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5373                               ref_and_mask, ref_and_mask, 0x20);
5374 }
5375
5376 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5377                                         struct amdgpu_job *job,
5378                                         struct amdgpu_ib *ib,
5379                                         uint32_t flags)
5380 {
5381         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5382         u32 header, control = 0;
5383
5384         if (ib->flags & AMDGPU_IB_FLAG_CE)
5385                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5386         else
5387                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5388
5389         control |= ib->length_dw | (vmid << 24);
5390
5391         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5392                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5393
5394                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5395                         gfx_v9_0_ring_emit_de_meta(ring);
5396         }
5397
5398         amdgpu_ring_write(ring, header);
5399         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5400         amdgpu_ring_write(ring,
5401 #ifdef __BIG_ENDIAN
5402                 (2 << 0) |
5403 #endif
5404                 lower_32_bits(ib->gpu_addr));
5405         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5406         amdgpu_ring_write(ring, control);
5407 }
5408
5409 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5410                                           struct amdgpu_job *job,
5411                                           struct amdgpu_ib *ib,
5412                                           uint32_t flags)
5413 {
5414         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5415         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5416
5417         /* Currently, there is a high possibility to get wave ID mismatch
5418          * between ME and GDS, leading to a hw deadlock, because ME generates
5419          * different wave IDs than the GDS expects. This situation happens
5420          * randomly when at least 5 compute pipes use GDS ordered append.
5421          * The wave IDs generated by ME are also wrong after suspend/resume.
5422          * Those are probably bugs somewhere else in the kernel driver.
5423          *
5424          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5425          * GDS to 0 for this ring (me/pipe).
5426          */
5427         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5428                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5429                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5430                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5431         }
5432
5433         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5434         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5435         amdgpu_ring_write(ring,
5436 #ifdef __BIG_ENDIAN
5437                                 (2 << 0) |
5438 #endif
5439                                 lower_32_bits(ib->gpu_addr));
5440         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5441         amdgpu_ring_write(ring, control);
5442 }
5443
5444 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5445                                      u64 seq, unsigned flags)
5446 {
5447         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5448         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5449         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5450
5451         /* RELEASE_MEM - flush caches, send int */
5452         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5453         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5454                                                EOP_TC_NC_ACTION_EN) :
5455                                               (EOP_TCL1_ACTION_EN |
5456                                                EOP_TC_ACTION_EN |
5457                                                EOP_TC_WB_ACTION_EN |
5458                                                EOP_TC_MD_ACTION_EN)) |
5459                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5460                                  EVENT_INDEX(5)));
5461         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5462
5463         /*
5464          * the address should be Qword aligned if 64bit write, Dword
5465          * aligned if only send 32bit data low (discard data high)
5466          */
5467         if (write64bit)
5468                 BUG_ON(addr & 0x7);
5469         else
5470                 BUG_ON(addr & 0x3);
5471         amdgpu_ring_write(ring, lower_32_bits(addr));
5472         amdgpu_ring_write(ring, upper_32_bits(addr));
5473         amdgpu_ring_write(ring, lower_32_bits(seq));
5474         amdgpu_ring_write(ring, upper_32_bits(seq));
5475         amdgpu_ring_write(ring, 0);
5476 }
5477
5478 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5479 {
5480         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5481         uint32_t seq = ring->fence_drv.sync_seq;
5482         uint64_t addr = ring->fence_drv.gpu_addr;
5483
5484         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5485                               lower_32_bits(addr), upper_32_bits(addr),
5486                               seq, 0xffffffff, 4);
5487 }
5488
5489 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5490                                         unsigned vmid, uint64_t pd_addr)
5491 {
5492         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5493
5494         /* compute doesn't have PFP */
5495         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5496                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5497                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5498                 amdgpu_ring_write(ring, 0x0);
5499         }
5500 }
5501
5502 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5503 {
5504         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5505 }
5506
5507 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5508 {
5509         u64 wptr;
5510
5511         /* XXX check if swapping is necessary on BE */
5512         if (ring->use_doorbell)
5513                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5514         else
5515                 BUG();
5516         return wptr;
5517 }
5518
5519 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5520 {
5521         struct amdgpu_device *adev = ring->adev;
5522
5523         /* XXX check if swapping is necessary on BE */
5524         if (ring->use_doorbell) {
5525                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5526                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5527         } else{
5528                 BUG(); /* only DOORBELL method supported on gfx9 now */
5529         }
5530 }
5531
5532 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5533                                          u64 seq, unsigned int flags)
5534 {
5535         struct amdgpu_device *adev = ring->adev;
5536
5537         /* we only allocate 32bit for each seq wb address */
5538         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5539
5540         /* write fence seq to the "addr" */
5541         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5542         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5543                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5544         amdgpu_ring_write(ring, lower_32_bits(addr));
5545         amdgpu_ring_write(ring, upper_32_bits(addr));
5546         amdgpu_ring_write(ring, lower_32_bits(seq));
5547
5548         if (flags & AMDGPU_FENCE_FLAG_INT) {
5549                 /* set register to trigger INT */
5550                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5551                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5552                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5553                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5554                 amdgpu_ring_write(ring, 0);
5555                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5556         }
5557 }
5558
5559 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5560 {
5561         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5562         amdgpu_ring_write(ring, 0);
5563 }
5564
5565 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5566 {
5567         struct v9_ce_ib_state ce_payload = {0};
5568         uint64_t csa_addr;
5569         int cnt;
5570
5571         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5572         csa_addr = amdgpu_csa_vaddr(ring->adev);
5573
5574         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5575         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5576                                  WRITE_DATA_DST_SEL(8) |
5577                                  WR_CONFIRM) |
5578                                  WRITE_DATA_CACHE_POLICY(0));
5579         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5580         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5581         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5582 }
5583
5584 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5585 {
5586         struct v9_de_ib_state de_payload = {0};
5587         uint64_t csa_addr, gds_addr;
5588         int cnt;
5589
5590         csa_addr = amdgpu_csa_vaddr(ring->adev);
5591         gds_addr = csa_addr + 4096;
5592         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5593         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5594
5595         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5596         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5597         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5598                                  WRITE_DATA_DST_SEL(8) |
5599                                  WR_CONFIRM) |
5600                                  WRITE_DATA_CACHE_POLICY(0));
5601         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5602         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5603         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5604 }
5605
5606 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5607                                    bool secure)
5608 {
5609         uint32_t v = secure ? FRAME_TMZ : 0;
5610
5611         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5612         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5613 }
5614
5615 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5616 {
5617         uint32_t dw2 = 0;
5618
5619         if (amdgpu_sriov_vf(ring->adev))
5620                 gfx_v9_0_ring_emit_ce_meta(ring);
5621
5622         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5623         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5624                 /* set load_global_config & load_global_uconfig */
5625                 dw2 |= 0x8001;
5626                 /* set load_cs_sh_regs */
5627                 dw2 |= 0x01000000;
5628                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5629                 dw2 |= 0x10002;
5630
5631                 /* set load_ce_ram if preamble presented */
5632                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5633                         dw2 |= 0x10000000;
5634         } else {
5635                 /* still load_ce_ram if this is the first time preamble presented
5636                  * although there is no context switch happens.
5637                  */
5638                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5639                         dw2 |= 0x10000000;
5640         }
5641
5642         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5643         amdgpu_ring_write(ring, dw2);
5644         amdgpu_ring_write(ring, 0);
5645 }
5646
5647 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5648 {
5649         unsigned ret;
5650         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5651         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5652         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5653         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5654         ret = ring->wptr & ring->buf_mask;
5655         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5656         return ret;
5657 }
5658
5659 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5660 {
5661         unsigned cur;
5662         BUG_ON(offset > ring->buf_mask);
5663         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5664
5665         cur = (ring->wptr & ring->buf_mask) - 1;
5666         if (likely(cur > offset))
5667                 ring->ring[offset] = cur - offset;
5668         else
5669                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5670 }
5671
5672 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5673                                     uint32_t reg_val_offs)
5674 {
5675         struct amdgpu_device *adev = ring->adev;
5676
5677         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5678         amdgpu_ring_write(ring, 0 |     /* src: register*/
5679                                 (5 << 8) |      /* dst: memory */
5680                                 (1 << 20));     /* write confirm */
5681         amdgpu_ring_write(ring, reg);
5682         amdgpu_ring_write(ring, 0);
5683         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5684                                 reg_val_offs * 4));
5685         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5686                                 reg_val_offs * 4));
5687 }
5688
5689 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5690                                     uint32_t val)
5691 {
5692         uint32_t cmd = 0;
5693
5694         switch (ring->funcs->type) {
5695         case AMDGPU_RING_TYPE_GFX:
5696                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5697                 break;
5698         case AMDGPU_RING_TYPE_KIQ:
5699                 cmd = (1 << 16); /* no inc addr */
5700                 break;
5701         default:
5702                 cmd = WR_CONFIRM;
5703                 break;
5704         }
5705         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5706         amdgpu_ring_write(ring, cmd);
5707         amdgpu_ring_write(ring, reg);
5708         amdgpu_ring_write(ring, 0);
5709         amdgpu_ring_write(ring, val);
5710 }
5711
5712 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5713                                         uint32_t val, uint32_t mask)
5714 {
5715         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5716 }
5717
5718 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5719                                                   uint32_t reg0, uint32_t reg1,
5720                                                   uint32_t ref, uint32_t mask)
5721 {
5722         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5723         struct amdgpu_device *adev = ring->adev;
5724         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5725                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5726
5727         if (fw_version_ok)
5728                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5729                                       ref, mask, 0x20);
5730         else
5731                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5732                                                            ref, mask);
5733 }
5734
5735 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5736 {
5737         struct amdgpu_device *adev = ring->adev;
5738         uint32_t value = 0;
5739
5740         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5741         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5742         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5743         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5744         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5745 }
5746
5747 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5748                                                  enum amdgpu_interrupt_state state)
5749 {
5750         switch (state) {
5751         case AMDGPU_IRQ_STATE_DISABLE:
5752         case AMDGPU_IRQ_STATE_ENABLE:
5753                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5754                                TIME_STAMP_INT_ENABLE,
5755                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5756                 break;
5757         default:
5758                 break;
5759         }
5760 }
5761
5762 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5763                                                      int me, int pipe,
5764                                                      enum amdgpu_interrupt_state state)
5765 {
5766         u32 mec_int_cntl, mec_int_cntl_reg;
5767
5768         /*
5769          * amdgpu controls only the first MEC. That's why this function only
5770          * handles the setting of interrupts for this specific MEC. All other
5771          * pipes' interrupts are set by amdkfd.
5772          */
5773
5774         if (me == 1) {
5775                 switch (pipe) {
5776                 case 0:
5777                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5778                         break;
5779                 case 1:
5780                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5781                         break;
5782                 case 2:
5783                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5784                         break;
5785                 case 3:
5786                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5787                         break;
5788                 default:
5789                         DRM_DEBUG("invalid pipe %d\n", pipe);
5790                         return;
5791                 }
5792         } else {
5793                 DRM_DEBUG("invalid me %d\n", me);
5794                 return;
5795         }
5796
5797         switch (state) {
5798         case AMDGPU_IRQ_STATE_DISABLE:
5799                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5800                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5801                                              TIME_STAMP_INT_ENABLE, 0);
5802                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5803                 break;
5804         case AMDGPU_IRQ_STATE_ENABLE:
5805                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5806                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5807                                              TIME_STAMP_INT_ENABLE, 1);
5808                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5809                 break;
5810         default:
5811                 break;
5812         }
5813 }
5814
5815 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5816                                              struct amdgpu_irq_src *source,
5817                                              unsigned type,
5818                                              enum amdgpu_interrupt_state state)
5819 {
5820         switch (state) {
5821         case AMDGPU_IRQ_STATE_DISABLE:
5822         case AMDGPU_IRQ_STATE_ENABLE:
5823                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5824                                PRIV_REG_INT_ENABLE,
5825                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5826                 break;
5827         default:
5828                 break;
5829         }
5830
5831         return 0;
5832 }
5833
5834 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5835                                               struct amdgpu_irq_src *source,
5836                                               unsigned type,
5837                                               enum amdgpu_interrupt_state state)
5838 {
5839         switch (state) {
5840         case AMDGPU_IRQ_STATE_DISABLE:
5841         case AMDGPU_IRQ_STATE_ENABLE:
5842                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5843                                PRIV_INSTR_INT_ENABLE,
5844                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5845                 break;
5846         default:
5847                 break;
5848         }
5849
5850         return 0;
5851 }
5852
5853 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5854         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5855                         CP_ECC_ERROR_INT_ENABLE, 1)
5856
5857 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5858         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5859                         CP_ECC_ERROR_INT_ENABLE, 0)
5860
5861 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5862                                               struct amdgpu_irq_src *source,
5863                                               unsigned type,
5864                                               enum amdgpu_interrupt_state state)
5865 {
5866         switch (state) {
5867         case AMDGPU_IRQ_STATE_DISABLE:
5868                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5869                                 CP_ECC_ERROR_INT_ENABLE, 0);
5870                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5871                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5872                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5873                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5874                 break;
5875
5876         case AMDGPU_IRQ_STATE_ENABLE:
5877                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5878                                 CP_ECC_ERROR_INT_ENABLE, 1);
5879                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5880                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5881                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5882                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5883                 break;
5884         default:
5885                 break;
5886         }
5887
5888         return 0;
5889 }
5890
5891
5892 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5893                                             struct amdgpu_irq_src *src,
5894                                             unsigned type,
5895                                             enum amdgpu_interrupt_state state)
5896 {
5897         switch (type) {
5898         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5899                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5900                 break;
5901         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5902                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5903                 break;
5904         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5905                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5906                 break;
5907         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5908                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5909                 break;
5910         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5911                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5912                 break;
5913         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5914                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5915                 break;
5916         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5917                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5918                 break;
5919         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5920                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5921                 break;
5922         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5923                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5924                 break;
5925         default:
5926                 break;
5927         }
5928         return 0;
5929 }
5930
5931 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5932                             struct amdgpu_irq_src *source,
5933                             struct amdgpu_iv_entry *entry)
5934 {
5935         int i;
5936         u8 me_id, pipe_id, queue_id;
5937         struct amdgpu_ring *ring;
5938
5939         DRM_DEBUG("IH: CP EOP\n");
5940         me_id = (entry->ring_id & 0x0c) >> 2;
5941         pipe_id = (entry->ring_id & 0x03) >> 0;
5942         queue_id = (entry->ring_id & 0x70) >> 4;
5943
5944         switch (me_id) {
5945         case 0:
5946                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5947                 break;
5948         case 1:
5949         case 2:
5950                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5951                         ring = &adev->gfx.compute_ring[i];
5952                         /* Per-queue interrupt is supported for MEC starting from VI.
5953                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5954                           */
5955                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5956                                 amdgpu_fence_process(ring);
5957                 }
5958                 break;
5959         }
5960         return 0;
5961 }
5962
5963 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5964                            struct amdgpu_iv_entry *entry)
5965 {
5966         u8 me_id, pipe_id, queue_id;
5967         struct amdgpu_ring *ring;
5968         int i;
5969
5970         me_id = (entry->ring_id & 0x0c) >> 2;
5971         pipe_id = (entry->ring_id & 0x03) >> 0;
5972         queue_id = (entry->ring_id & 0x70) >> 4;
5973
5974         switch (me_id) {
5975         case 0:
5976                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5977                 break;
5978         case 1:
5979         case 2:
5980                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5981                         ring = &adev->gfx.compute_ring[i];
5982                         if (ring->me == me_id && ring->pipe == pipe_id &&
5983                             ring->queue == queue_id)
5984                                 drm_sched_fault(&ring->sched);
5985                 }
5986                 break;
5987         }
5988 }
5989
5990 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5991                                  struct amdgpu_irq_src *source,
5992                                  struct amdgpu_iv_entry *entry)
5993 {
5994         DRM_ERROR("Illegal register access in command stream\n");
5995         gfx_v9_0_fault(adev, entry);
5996         return 0;
5997 }
5998
5999 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6000                                   struct amdgpu_irq_src *source,
6001                                   struct amdgpu_iv_entry *entry)
6002 {
6003         DRM_ERROR("Illegal instruction in command stream\n");
6004         gfx_v9_0_fault(adev, entry);
6005         return 0;
6006 }
6007
6008
6009 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6010         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6011           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6012           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6013         },
6014         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6015           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6016           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6017         },
6018         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6019           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6020           0, 0
6021         },
6022         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6023           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6024           0, 0
6025         },
6026         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6027           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6028           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6029         },
6030         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6031           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6032           0, 0
6033         },
6034         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6035           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6036           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6037         },
6038         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6039           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6040           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6041         },
6042         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6043           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6044           0, 0
6045         },
6046         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6047           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6048           0, 0
6049         },
6050         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6051           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6052           0, 0
6053         },
6054         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6055           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6056           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6057         },
6058         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6059           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6060           0, 0
6061         },
6062         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6063           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6064           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6065         },
6066         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6067           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6068           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6069           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6070         },
6071         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6072           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6073           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6074           0, 0
6075         },
6076         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6077           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6078           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6079           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6080         },
6081         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6082           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6083           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6084           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6085         },
6086         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6087           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6088           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6089           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6090         },
6091         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6092           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6093           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6094           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6095         },
6096         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6097           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6098           0, 0
6099         },
6100         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6101           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6102           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6103         },
6104         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6105           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6106           0, 0
6107         },
6108         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6109           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6110           0, 0
6111         },
6112         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6113           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6114           0, 0
6115         },
6116         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6117           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6118           0, 0
6119         },
6120         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6121           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6122           0, 0
6123         },
6124         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6125           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6126           0, 0
6127         },
6128         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6129           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6130           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6131         },
6132         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6133           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6134           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6135         },
6136         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6137           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6138           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6139         },
6140         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6141           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6142           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6143         },
6144         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6145           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6146           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6147         },
6148         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6149           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6150           0, 0
6151         },
6152         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6153           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6154           0, 0
6155         },
6156         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6157           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6158           0, 0
6159         },
6160         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6161           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6162           0, 0
6163         },
6164         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6165           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6166           0, 0
6167         },
6168         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6169           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6170           0, 0
6171         },
6172         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6173           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6174           0, 0
6175         },
6176         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6177           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6178           0, 0
6179         },
6180         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6181           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6182           0, 0
6183         },
6184         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6185           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6186           0, 0
6187         },
6188         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6189           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6190           0, 0
6191         },
6192         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6193           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6194           0, 0
6195         },
6196         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6197           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6198           0, 0
6199         },
6200         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6201           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6202           0, 0
6203         },
6204         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6205           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6206           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6207         },
6208         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6209           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6210           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6211         },
6212         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6213           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6214           0, 0
6215         },
6216         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6217           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6218           0, 0
6219         },
6220         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6221           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6222           0, 0
6223         },
6224         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6225           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6226           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6227         },
6228         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6229           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6230           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6231         },
6232         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6233           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6234           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6235         },
6236         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6237           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6238           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6239         },
6240         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6241           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6242           0, 0
6243         },
6244         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6245           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6246           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6247         },
6248         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6249           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6250           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6251         },
6252         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6253           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6254           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6255         },
6256         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6257           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6258           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6259         },
6260         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6261           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6262           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6263         },
6264         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6265           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6266           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6267         },
6268         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6269           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6270           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6271         },
6272         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6273           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6274           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6275         },
6276         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6277           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6278           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6279         },
6280         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6281           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6282           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6283         },
6284         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6285           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6286           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6287         },
6288         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6289           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6290           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6291         },
6292         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6293           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6294           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6295         },
6296         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6297           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6298           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6299         },
6300         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6301           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6302           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6303         },
6304         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6305           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6306           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6307         },
6308         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6309           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6310           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6311         },
6312         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6313           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6314           0, 0
6315         },
6316         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6317           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6318           0, 0
6319         },
6320         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6321           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6322           0, 0
6323         },
6324         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6325           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6326           0, 0
6327         },
6328         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6329           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6330           0, 0
6331         },
6332         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6333           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6334           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6335         },
6336         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6337           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6338           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6339         },
6340         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6341           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6342           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6343         },
6344         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6345           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6346           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6347         },
6348         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6349           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6350           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6351         },
6352         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6353           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6354           0, 0
6355         },
6356         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6357           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6358           0, 0
6359         },
6360         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6361           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6362           0, 0
6363         },
6364         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6365           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6366           0, 0
6367         },
6368         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6369           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6370           0, 0
6371         },
6372         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6373           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6374           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6375         },
6376         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6377           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6378           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6379         },
6380         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6381           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6382           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6383         },
6384         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6385           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6386           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6387         },
6388         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6389           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6390           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6391         },
6392         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6393           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6394           0, 0
6395         },
6396         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6397           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6398           0, 0
6399         },
6400         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6401           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6402           0, 0
6403         },
6404         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6405           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6406           0, 0
6407         },
6408         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6409           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6410           0, 0
6411         },
6412         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6413           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6414           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6415         },
6416         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6417           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6418           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6419         },
6420         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6421           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6422           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6423         },
6424         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6425           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6426           0, 0
6427         },
6428         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6429           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6430           0, 0
6431         },
6432         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6433           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6434           0, 0
6435         },
6436         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6437           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6438           0, 0
6439         },
6440         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6441           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6442           0, 0
6443         },
6444         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6445           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6446           0, 0
6447         }
6448 };
6449
6450 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6451                                      void *inject_if)
6452 {
6453         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6454         int ret;
6455         struct ta_ras_trigger_error_input block_info = { 0 };
6456
6457         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6458                 return -EINVAL;
6459
6460         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6461                 return -EINVAL;
6462
6463         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6464                 return -EPERM;
6465
6466         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6467               info->head.type)) {
6468                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6469                         ras_gfx_subblocks[info->head.sub_block_index].name,
6470                         info->head.type);
6471                 return -EPERM;
6472         }
6473
6474         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6475               info->head.type)) {
6476                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6477                         ras_gfx_subblocks[info->head.sub_block_index].name,
6478                         info->head.type);
6479                 return -EPERM;
6480         }
6481
6482         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6483         block_info.sub_block_index =
6484                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6485         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6486         block_info.address = info->address;
6487         block_info.value = info->value;
6488
6489         mutex_lock(&adev->grbm_idx_mutex);
6490         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6491         mutex_unlock(&adev->grbm_idx_mutex);
6492
6493         return ret;
6494 }
6495
6496 static const char *vml2_mems[] = {
6497         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6498         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6499         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6500         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6501         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6502         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6503         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6504         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6505         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6506         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6507         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6508         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6509         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6510         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6511         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6512         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6513 };
6514
6515 static const char *vml2_walker_mems[] = {
6516         "UTC_VML2_CACHE_PDE0_MEM0",
6517         "UTC_VML2_CACHE_PDE0_MEM1",
6518         "UTC_VML2_CACHE_PDE1_MEM0",
6519         "UTC_VML2_CACHE_PDE1_MEM1",
6520         "UTC_VML2_CACHE_PDE2_MEM0",
6521         "UTC_VML2_CACHE_PDE2_MEM1",
6522         "UTC_VML2_RDIF_LOG_FIFO",
6523 };
6524
6525 static const char *atc_l2_cache_2m_mems[] = {
6526         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6527         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6528         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6529         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6530 };
6531
6532 static const char *atc_l2_cache_4k_mems[] = {
6533         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6534         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6535         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6536         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6537         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6538         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6539         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6540         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6541         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6542         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6543         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6544         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6545         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6546         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6547         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6548         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6549         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6550         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6551         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6552         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6553         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6554         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6555         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6556         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6557         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6558         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6559         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6560         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6561         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6562         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6563         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6564         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6565 };
6566
6567 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6568                                          struct ras_err_data *err_data)
6569 {
6570         uint32_t i, data;
6571         uint32_t sec_count, ded_count;
6572
6573         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6574         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6575         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6576         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6577         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6578         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6579         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6580         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6581
6582         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6583                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6584                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6585
6586                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6587                 if (sec_count) {
6588                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6589                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6590                         err_data->ce_count += sec_count;
6591                 }
6592
6593                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6594                 if (ded_count) {
6595                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6596                                 "DED %d\n", i, vml2_mems[i], ded_count);
6597                         err_data->ue_count += ded_count;
6598                 }
6599         }
6600
6601         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6602                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6603                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6604
6605                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6606                                                 SEC_COUNT);
6607                 if (sec_count) {
6608                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6609                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6610                         err_data->ce_count += sec_count;
6611                 }
6612
6613                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6614                                                 DED_COUNT);
6615                 if (ded_count) {
6616                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6617                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6618                         err_data->ue_count += ded_count;
6619                 }
6620         }
6621
6622         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6623                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6624                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6625
6626                 sec_count = (data & 0x00006000L) >> 0xd;
6627                 if (sec_count) {
6628                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6629                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6630                                 sec_count);
6631                         err_data->ce_count += sec_count;
6632                 }
6633         }
6634
6635         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6636                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6637                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6638
6639                 sec_count = (data & 0x00006000L) >> 0xd;
6640                 if (sec_count) {
6641                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6642                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6643                                 sec_count);
6644                         err_data->ce_count += sec_count;
6645                 }
6646
6647                 ded_count = (data & 0x00018000L) >> 0xf;
6648                 if (ded_count) {
6649                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6650                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6651                                 ded_count);
6652                         err_data->ue_count += ded_count;
6653                 }
6654         }
6655
6656         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6657         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6658         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6659         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6660
6661         return 0;
6662 }
6663
6664 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6665         const struct soc15_reg_entry *reg,
6666         uint32_t se_id, uint32_t inst_id, uint32_t value,
6667         uint32_t *sec_count, uint32_t *ded_count)
6668 {
6669         uint32_t i;
6670         uint32_t sec_cnt, ded_cnt;
6671
6672         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6673                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6674                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6675                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6676                         continue;
6677
6678                 sec_cnt = (value &
6679                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6680                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6681                 if (sec_cnt) {
6682                         dev_info(adev->dev, "GFX SubBlock %s, "
6683                                 "Instance[%d][%d], SEC %d\n",
6684                                 gfx_v9_0_ras_fields[i].name,
6685                                 se_id, inst_id,
6686                                 sec_cnt);
6687                         *sec_count += sec_cnt;
6688                 }
6689
6690                 ded_cnt = (value &
6691                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6692                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6693                 if (ded_cnt) {
6694                         dev_info(adev->dev, "GFX SubBlock %s, "
6695                                 "Instance[%d][%d], DED %d\n",
6696                                 gfx_v9_0_ras_fields[i].name,
6697                                 se_id, inst_id,
6698                                 ded_cnt);
6699                         *ded_count += ded_cnt;
6700                 }
6701         }
6702
6703         return 0;
6704 }
6705
6706 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6707 {
6708         int i, j, k;
6709
6710         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6711                 return;
6712
6713         /* read back registers to clear the counters */
6714         mutex_lock(&adev->grbm_idx_mutex);
6715         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6716                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6717                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6718                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6719                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6720                         }
6721                 }
6722         }
6723         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6724         mutex_unlock(&adev->grbm_idx_mutex);
6725
6726         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6727         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6728         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6729         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6730         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6731         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6732         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6733         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6734
6735         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6736                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6737                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6738         }
6739
6740         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6741                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6742                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6743         }
6744
6745         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6746                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6747                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6748         }
6749
6750         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6751                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6752                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6753         }
6754
6755         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6756         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6757         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6758         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6759 }
6760
6761 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6762                                           void *ras_error_status)
6763 {
6764         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6765         uint32_t sec_count = 0, ded_count = 0;
6766         uint32_t i, j, k;
6767         uint32_t reg_value;
6768
6769         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6770                 return -EINVAL;
6771
6772         err_data->ue_count = 0;
6773         err_data->ce_count = 0;
6774
6775         mutex_lock(&adev->grbm_idx_mutex);
6776
6777         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6778                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6779                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6780                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6781                                 reg_value =
6782                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6783                                 if (reg_value)
6784                                         gfx_v9_0_ras_error_count(adev,
6785                                                 &gfx_v9_0_edc_counter_regs[i],
6786                                                 j, k, reg_value,
6787                                                 &sec_count, &ded_count);
6788                         }
6789                 }
6790         }
6791
6792         err_data->ce_count += sec_count;
6793         err_data->ue_count += ded_count;
6794
6795         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6796         mutex_unlock(&adev->grbm_idx_mutex);
6797
6798         gfx_v9_0_query_utc_edc_status(adev, err_data);
6799
6800         return 0;
6801 }
6802
6803 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6804 {
6805         const unsigned int cp_coher_cntl =
6806                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6807                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6808                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6809                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6810                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6811
6812         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6813         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6814         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6815         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6816         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6817         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6818         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6819         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6820 }
6821
6822 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6823                                         uint32_t pipe, bool enable)
6824 {
6825         struct amdgpu_device *adev = ring->adev;
6826         uint32_t val;
6827         uint32_t wcl_cs_reg;
6828
6829         /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6830         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6831
6832         switch (pipe) {
6833         case 0:
6834                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6835                 break;
6836         case 1:
6837                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6838                 break;
6839         case 2:
6840                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6841                 break;
6842         case 3:
6843                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6844                 break;
6845         default:
6846                 DRM_DEBUG("invalid pipe %d\n", pipe);
6847                 return;
6848         }
6849
6850         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6851
6852 }
6853 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6854 {
6855         struct amdgpu_device *adev = ring->adev;
6856         uint32_t val;
6857         int i;
6858
6859
6860         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6861          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6862          * around 25% of gpu resources.
6863          */
6864         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6865         amdgpu_ring_emit_wreg(ring,
6866                               SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6867                               val);
6868
6869         /* Restrict waves for normal/low priority compute queues as well
6870          * to get best QoS for high priority compute jobs.
6871          *
6872          * amdgpu controls only 1st ME(0-3 CS pipes).
6873          */
6874         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6875                 if (i != ring->pipe)
6876                         gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6877
6878         }
6879 }
6880
6881 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6882         .name = "gfx_v9_0",
6883         .early_init = gfx_v9_0_early_init,
6884         .late_init = gfx_v9_0_late_init,
6885         .sw_init = gfx_v9_0_sw_init,
6886         .sw_fini = gfx_v9_0_sw_fini,
6887         .hw_init = gfx_v9_0_hw_init,
6888         .hw_fini = gfx_v9_0_hw_fini,
6889         .suspend = gfx_v9_0_suspend,
6890         .resume = gfx_v9_0_resume,
6891         .is_idle = gfx_v9_0_is_idle,
6892         .wait_for_idle = gfx_v9_0_wait_for_idle,
6893         .soft_reset = gfx_v9_0_soft_reset,
6894         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6895         .set_powergating_state = gfx_v9_0_set_powergating_state,
6896         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6897 };
6898
6899 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6900         .type = AMDGPU_RING_TYPE_GFX,
6901         .align_mask = 0xff,
6902         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6903         .support_64bit_ptrs = true,
6904         .vmhub = AMDGPU_GFXHUB_0,
6905         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6906         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6907         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6908         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6909                 5 +  /* COND_EXEC */
6910                 7 +  /* PIPELINE_SYNC */
6911                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6912                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6913                 2 + /* VM_FLUSH */
6914                 8 +  /* FENCE for VM_FLUSH */
6915                 20 + /* GDS switch */
6916                 4 + /* double SWITCH_BUFFER,
6917                        the first COND_EXEC jump to the place just
6918                            prior to this double SWITCH_BUFFER  */
6919                 5 + /* COND_EXEC */
6920                 7 +      /*     HDP_flush */
6921                 4 +      /*     VGT_flush */
6922                 14 + /* CE_META */
6923                 31 + /* DE_META */
6924                 3 + /* CNTX_CTRL */
6925                 5 + /* HDP_INVL */
6926                 8 + 8 + /* FENCE x2 */
6927                 2 + /* SWITCH_BUFFER */
6928                 7, /* gfx_v9_0_emit_mem_sync */
6929         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6930         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6931         .emit_fence = gfx_v9_0_ring_emit_fence,
6932         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6933         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6934         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6935         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6936         .test_ring = gfx_v9_0_ring_test_ring,
6937         .test_ib = gfx_v9_0_ring_test_ib,
6938         .insert_nop = amdgpu_ring_insert_nop,
6939         .pad_ib = amdgpu_ring_generic_pad_ib,
6940         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6941         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6942         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6943         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6944         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6945         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6946         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6947         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6948         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6949         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6950 };
6951
6952 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6953         .type = AMDGPU_RING_TYPE_COMPUTE,
6954         .align_mask = 0xff,
6955         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6956         .support_64bit_ptrs = true,
6957         .vmhub = AMDGPU_GFXHUB_0,
6958         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6959         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6960         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6961         .emit_frame_size =
6962                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6963                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6964                 5 + /* hdp invalidate */
6965                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6966                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6967                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6968                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6969                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6970                 7 + /* gfx_v9_0_emit_mem_sync */
6971                 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6972                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6973         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6974         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6975         .emit_fence = gfx_v9_0_ring_emit_fence,
6976         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6977         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6978         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6979         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6980         .test_ring = gfx_v9_0_ring_test_ring,
6981         .test_ib = gfx_v9_0_ring_test_ib,
6982         .insert_nop = amdgpu_ring_insert_nop,
6983         .pad_ib = amdgpu_ring_generic_pad_ib,
6984         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6985         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6986         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6987         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6988         .emit_wave_limit = gfx_v9_0_emit_wave_limit,
6989 };
6990
6991 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6992         .type = AMDGPU_RING_TYPE_KIQ,
6993         .align_mask = 0xff,
6994         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6995         .support_64bit_ptrs = true,
6996         .vmhub = AMDGPU_GFXHUB_0,
6997         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6998         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6999         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7000         .emit_frame_size =
7001                 20 + /* gfx_v9_0_ring_emit_gds_switch */
7002                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7003                 5 + /* hdp invalidate */
7004                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7005                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7006                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7007                 2 + /* gfx_v9_0_ring_emit_vm_flush */
7008                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7009         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7010         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7011         .test_ring = gfx_v9_0_ring_test_ring,
7012         .insert_nop = amdgpu_ring_insert_nop,
7013         .pad_ib = amdgpu_ring_generic_pad_ib,
7014         .emit_rreg = gfx_v9_0_ring_emit_rreg,
7015         .emit_wreg = gfx_v9_0_ring_emit_wreg,
7016         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7017         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7018 };
7019
7020 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7021 {
7022         int i;
7023
7024         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7025
7026         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7027                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7028
7029         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7030                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7031 }
7032
7033 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7034         .set = gfx_v9_0_set_eop_interrupt_state,
7035         .process = gfx_v9_0_eop_irq,
7036 };
7037
7038 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7039         .set = gfx_v9_0_set_priv_reg_fault_state,
7040         .process = gfx_v9_0_priv_reg_irq,
7041 };
7042
7043 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7044         .set = gfx_v9_0_set_priv_inst_fault_state,
7045         .process = gfx_v9_0_priv_inst_irq,
7046 };
7047
7048 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7049         .set = gfx_v9_0_set_cp_ecc_error_state,
7050         .process = amdgpu_gfx_cp_ecc_error_irq,
7051 };
7052
7053
7054 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7055 {
7056         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7057         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7058
7059         adev->gfx.priv_reg_irq.num_types = 1;
7060         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7061
7062         adev->gfx.priv_inst_irq.num_types = 1;
7063         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7064
7065         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7066         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7067 }
7068
7069 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7070 {
7071         switch (adev->ip_versions[GC_HWIP][0]) {
7072         case IP_VERSION(9, 0, 1):
7073         case IP_VERSION(9, 2, 1):
7074         case IP_VERSION(9, 4, 0):
7075         case IP_VERSION(9, 2, 2):
7076         case IP_VERSION(9, 1, 0):
7077         case IP_VERSION(9, 4, 1):
7078         case IP_VERSION(9, 3, 0):
7079         case IP_VERSION(9, 4, 2):
7080                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7081                 break;
7082         default:
7083                 break;
7084         }
7085 }
7086
7087 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7088 {
7089         /* init asci gds info */
7090         switch (adev->ip_versions[GC_HWIP][0]) {
7091         case IP_VERSION(9, 0, 1):
7092         case IP_VERSION(9, 2, 1):
7093         case IP_VERSION(9, 4, 0):
7094                 adev->gds.gds_size = 0x10000;
7095                 break;
7096         case IP_VERSION(9, 2, 2):
7097         case IP_VERSION(9, 1, 0):
7098         case IP_VERSION(9, 4, 1):
7099                 adev->gds.gds_size = 0x1000;
7100                 break;
7101         case IP_VERSION(9, 4, 2):
7102                 /* aldebaran removed all the GDS internal memory,
7103                  * only support GWS opcode in kernel, like barrier
7104                  * semaphore.etc */
7105                 adev->gds.gds_size = 0;
7106                 break;
7107         default:
7108                 adev->gds.gds_size = 0x10000;
7109                 break;
7110         }
7111
7112         switch (adev->ip_versions[GC_HWIP][0]) {
7113         case IP_VERSION(9, 0, 1):
7114         case IP_VERSION(9, 4, 0):
7115                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7116                 break;
7117         case IP_VERSION(9, 2, 1):
7118                 adev->gds.gds_compute_max_wave_id = 0x27f;
7119                 break;
7120         case IP_VERSION(9, 2, 2):
7121         case IP_VERSION(9, 1, 0):
7122                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7123                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7124                 else
7125                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7126                 break;
7127         case IP_VERSION(9, 4, 1):
7128                 adev->gds.gds_compute_max_wave_id = 0xfff;
7129                 break;
7130         case IP_VERSION(9, 4, 2):
7131                 /* deprecated for Aldebaran, no usage at all */
7132                 adev->gds.gds_compute_max_wave_id = 0;
7133                 break;
7134         default:
7135                 /* this really depends on the chip */
7136                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7137                 break;
7138         }
7139
7140         adev->gds.gws_size = 64;
7141         adev->gds.oa_size = 16;
7142 }
7143
7144 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7145                                                  u32 bitmap)
7146 {
7147         u32 data;
7148
7149         if (!bitmap)
7150                 return;
7151
7152         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7153         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7154
7155         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7156 }
7157
7158 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7159 {
7160         u32 data, mask;
7161
7162         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7163         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7164
7165         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7166         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7167
7168         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7169
7170         return (~data) & mask;
7171 }
7172
7173 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7174                                  struct amdgpu_cu_info *cu_info)
7175 {
7176         int i, j, k, counter, active_cu_number = 0;
7177         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7178         unsigned disable_masks[4 * 4];
7179
7180         if (!adev || !cu_info)
7181                 return -EINVAL;
7182
7183         /*
7184          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7185          */
7186         if (adev->gfx.config.max_shader_engines *
7187                 adev->gfx.config.max_sh_per_se > 16)
7188                 return -EINVAL;
7189
7190         amdgpu_gfx_parse_disable_cu(disable_masks,
7191                                     adev->gfx.config.max_shader_engines,
7192                                     adev->gfx.config.max_sh_per_se);
7193
7194         mutex_lock(&adev->grbm_idx_mutex);
7195         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7196                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7197                         mask = 1;
7198                         ao_bitmap = 0;
7199                         counter = 0;
7200                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7201                         gfx_v9_0_set_user_cu_inactive_bitmap(
7202                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7203                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7204
7205                         /*
7206                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
7207                          * 4x4 size array, and it's usually suitable for Vega
7208                          * ASICs which has 4*2 SE/SH layout.
7209                          * But for Arcturus, SE/SH layout is changed to 8*1.
7210                          * To mostly reduce the impact, we make it compatible
7211                          * with current bitmap array as below:
7212                          *    SE4,SH0 --> bitmap[0][1]
7213                          *    SE5,SH0 --> bitmap[1][1]
7214                          *    SE6,SH0 --> bitmap[2][1]
7215                          *    SE7,SH0 --> bitmap[3][1]
7216                          */
7217                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7218
7219                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7220                                 if (bitmap & mask) {
7221                                         if (counter < adev->gfx.config.max_cu_per_sh)
7222                                                 ao_bitmap |= mask;
7223                                         counter ++;
7224                                 }
7225                                 mask <<= 1;
7226                         }
7227                         active_cu_number += counter;
7228                         if (i < 2 && j < 2)
7229                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7230                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7231                 }
7232         }
7233         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7234         mutex_unlock(&adev->grbm_idx_mutex);
7235
7236         cu_info->number = active_cu_number;
7237         cu_info->ao_cu_mask = ao_cu_mask;
7238         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7239
7240         return 0;
7241 }
7242
7243 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7244 {
7245         .type = AMD_IP_BLOCK_TYPE_GFX,
7246         .major = 9,
7247         .minor = 0,
7248         .rev = 0,
7249         .funcs = &gfx_v9_0_ip_funcs,
7250 };