Merge tag 'arc-5.6-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #include "gfx_v9_4.h"
52
53 #define GFX9_NUM_GFX_RINGS     1
54 #define GFX9_MEC_HPD_SIZE 4096
55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
57
58 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
64
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
122
123 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
124 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
126 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
128 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
130 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
132 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
133 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
134 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
135
136 enum ta_ras_gfx_subblock {
137         /*CPC*/
138         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
139         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
140         TA_RAS_BLOCK__GFX_CPC_UCODE,
141         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
142         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
143         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
144         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
145         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
146         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
147         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
148         /* CPF*/
149         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
150         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
151         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
152         TA_RAS_BLOCK__GFX_CPF_TAG,
153         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
154         /* CPG*/
155         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
156         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
157         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
158         TA_RAS_BLOCK__GFX_CPG_TAG,
159         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
160         /* GDS*/
161         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
162         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
163         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
164         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
165         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
166         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
167         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
168         /* SPI*/
169         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
170         /* SQ*/
171         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
172         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
173         TA_RAS_BLOCK__GFX_SQ_LDS_D,
174         TA_RAS_BLOCK__GFX_SQ_LDS_I,
175         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
176         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
177         /* SQC (3 ranges)*/
178         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179         /* SQC range 0*/
180         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
181         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
182                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
186         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
187         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
188         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
190                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
191         /* SQC range 1*/
192         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
194                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
196         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
197         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
201         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
202         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
204                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
205         /* SQC range 2*/
206         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
208                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
215         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
216         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
218                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
219         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
220         /* TA*/
221         TA_RAS_BLOCK__GFX_TA_INDEX_START,
222         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
223         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
224         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
225         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
226         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
227         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
228         /* TCA*/
229         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
230         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
231         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
232         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
233         /* TCC (5 sub-ranges)*/
234         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235         /* TCC range 0*/
236         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
239         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
240         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
241         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
242         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
243         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
244         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
245         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
246         /* TCC range 1*/
247         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
248         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
249         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
251                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
252         /* TCC range 2*/
253         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
254         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
255         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
256         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
257         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
258         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
259         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
260         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
261         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
263                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
264         /* TCC range 3*/
265         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
266         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
267         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
269                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
270         /* TCC range 4*/
271         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
273                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
274         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
276                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
277         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
278         /* TCI*/
279         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
280         /* TCP*/
281         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
282         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
283         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
284         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
285         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
286         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
287         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
288         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
289         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
290         /* TD*/
291         TA_RAS_BLOCK__GFX_TD_INDEX_START,
292         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
293         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
294         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
295         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
296         /* EA (3 sub-ranges)*/
297         TA_RAS_BLOCK__GFX_EA_INDEX_START,
298         /* EA range 0*/
299         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
300         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
301         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
303         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
304         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
306         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
307         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
308         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
309         /* EA range 1*/
310         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
311         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
312         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
313         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
314         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
315         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
316         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
318         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
319         /* EA range 2*/
320         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
321         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
322         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
323         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
324         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
325         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
326         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
327         /* UTC VM L2 bank*/
328         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
329         /* UTC VM walker*/
330         TA_RAS_BLOCK__UTC_VML2_WALKER,
331         /* UTC ATC L2 2MB cache*/
332         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
333         /* UTC ATC L2 4KB cache*/
334         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
335         TA_RAS_BLOCK__GFX_MAX
336 };
337
338 struct ras_gfx_subblock {
339         unsigned char *name;
340         int ta_subblock;
341         int hw_supported_error_type;
342         int sw_supported_error_type;
343 };
344
345 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
346         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
347                 #subblock,                                                     \
348                 TA_RAS_BLOCK__##subblock,                                      \
349                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
350                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
351         }
352
353 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
354         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
355         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
366         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
367         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
371                              0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
373                              0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
382                              0, 0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
384                              0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386                              0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
390                              0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
392                              0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
394                              1),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
396                              0, 0, 0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
408                              0, 0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
414                              0, 0, 0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
426                              0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
440                              1),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
442                              1),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
444                              0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
446                              0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
459                              0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
462                              0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
464                              0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
466                              0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
476         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
501 };
502
503 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
504 {
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
525 };
526
527 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
528 {
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
547 };
548
549 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
550 {
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
560         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
562 };
563
564 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
565 {
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
590 };
591
592 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
593 {
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
601 };
602
603 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
604 {
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 };
625
626 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
627 {
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
640 };
641
642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
643 {
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
650 {
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
667 };
668
669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
670 {
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
684 };
685
686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
687 {
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
696         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
698 };
699
700 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
701 {
702         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710 };
711
712 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
713 {
714         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722 };
723
724 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
725 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
726 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
727 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
728
729 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
731 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
732 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
733 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
734                                  struct amdgpu_cu_info *cu_info);
735 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
736 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
737 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
738 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
739 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
740                                           void *ras_error_status);
741 static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev);
742 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
743                                      void *inject_if);
744
745 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
746                                 uint64_t queue_mask)
747 {
748         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
749         amdgpu_ring_write(kiq_ring,
750                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
751                 /* vmid_mask:0* queue_type:0 (KIQ) */
752                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
753         amdgpu_ring_write(kiq_ring,
754                         lower_32_bits(queue_mask));     /* queue mask lo */
755         amdgpu_ring_write(kiq_ring,
756                         upper_32_bits(queue_mask));     /* queue mask hi */
757         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
758         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
759         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
760         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
761 }
762
763 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
764                                  struct amdgpu_ring *ring)
765 {
766         struct amdgpu_device *adev = kiq_ring->adev;
767         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
768         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
769         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
770
771         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
772         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
773         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
774                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
775                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
776                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
777                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
778                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
779                          /*queue_type: normal compute queue */
780                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
781                          /* alloc format: all_on_one_pipe */
782                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
783                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
784                          /* num_queues: must be 1 */
785                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
786         amdgpu_ring_write(kiq_ring,
787                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
788         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
789         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
790         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
791         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
792 }
793
794 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
795                                    struct amdgpu_ring *ring,
796                                    enum amdgpu_unmap_queues_action action,
797                                    u64 gpu_addr, u64 seq)
798 {
799         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
800
801         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
802         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
803                           PACKET3_UNMAP_QUEUES_ACTION(action) |
804                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
805                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
806                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
807         amdgpu_ring_write(kiq_ring,
808                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
809
810         if (action == PREEMPT_QUEUES_NO_UNMAP) {
811                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
812                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
813                 amdgpu_ring_write(kiq_ring, seq);
814         } else {
815                 amdgpu_ring_write(kiq_ring, 0);
816                 amdgpu_ring_write(kiq_ring, 0);
817                 amdgpu_ring_write(kiq_ring, 0);
818         }
819 }
820
821 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
822                                    struct amdgpu_ring *ring,
823                                    u64 addr,
824                                    u64 seq)
825 {
826         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
827
828         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
829         amdgpu_ring_write(kiq_ring,
830                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
831                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
832                           PACKET3_QUERY_STATUS_COMMAND(2));
833         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
834         amdgpu_ring_write(kiq_ring,
835                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
836                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
837         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
838         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
839         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
840         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
841 }
842
843 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
844                                 uint16_t pasid, uint32_t flush_type,
845                                 bool all_hub)
846 {
847         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
848         amdgpu_ring_write(kiq_ring,
849                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
850                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
851                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
852                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
853 }
854
855 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
856         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
857         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
858         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
859         .kiq_query_status = gfx_v9_0_kiq_query_status,
860         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
861         .set_resources_size = 8,
862         .map_queues_size = 7,
863         .unmap_queues_size = 6,
864         .query_status_size = 7,
865         .invalidate_tlbs_size = 2,
866 };
867
868 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
869 {
870         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
871 }
872
873 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
874 {
875         switch (adev->asic_type) {
876         case CHIP_VEGA10:
877                 soc15_program_register_sequence(adev,
878                                                 golden_settings_gc_9_0,
879                                                 ARRAY_SIZE(golden_settings_gc_9_0));
880                 soc15_program_register_sequence(adev,
881                                                 golden_settings_gc_9_0_vg10,
882                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
883                 break;
884         case CHIP_VEGA12:
885                 soc15_program_register_sequence(adev,
886                                                 golden_settings_gc_9_2_1,
887                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
888                 soc15_program_register_sequence(adev,
889                                                 golden_settings_gc_9_2_1_vg12,
890                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
891                 break;
892         case CHIP_VEGA20:
893                 soc15_program_register_sequence(adev,
894                                                 golden_settings_gc_9_0,
895                                                 ARRAY_SIZE(golden_settings_gc_9_0));
896                 soc15_program_register_sequence(adev,
897                                                 golden_settings_gc_9_0_vg20,
898                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
899                 break;
900         case CHIP_ARCTURUS:
901                 soc15_program_register_sequence(adev,
902                                                 golden_settings_gc_9_4_1_arct,
903                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
904                 break;
905         case CHIP_RAVEN:
906                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
907                                                 ARRAY_SIZE(golden_settings_gc_9_1));
908                 if (adev->rev_id >= 8)
909                         soc15_program_register_sequence(adev,
910                                                         golden_settings_gc_9_1_rv2,
911                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
912                 else
913                         soc15_program_register_sequence(adev,
914                                                         golden_settings_gc_9_1_rv1,
915                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
916                 break;
917          case CHIP_RENOIR:
918                 soc15_program_register_sequence(adev,
919                                                 golden_settings_gc_9_1_rn,
920                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
921                 return; /* for renoir, don't need common goldensetting */
922         default:
923                 break;
924         }
925
926         if (adev->asic_type != CHIP_ARCTURUS)
927                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
928                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
929 }
930
931 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
932 {
933         adev->gfx.scratch.num_reg = 8;
934         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
935         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
936 }
937
938 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
939                                        bool wc, uint32_t reg, uint32_t val)
940 {
941         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
942         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
943                                 WRITE_DATA_DST_SEL(0) |
944                                 (wc ? WR_CONFIRM : 0));
945         amdgpu_ring_write(ring, reg);
946         amdgpu_ring_write(ring, 0);
947         amdgpu_ring_write(ring, val);
948 }
949
950 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
951                                   int mem_space, int opt, uint32_t addr0,
952                                   uint32_t addr1, uint32_t ref, uint32_t mask,
953                                   uint32_t inv)
954 {
955         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
956         amdgpu_ring_write(ring,
957                                  /* memory (1) or register (0) */
958                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
959                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
960                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
961                                  WAIT_REG_MEM_ENGINE(eng_sel)));
962
963         if (mem_space)
964                 BUG_ON(addr0 & 0x3); /* Dword align */
965         amdgpu_ring_write(ring, addr0);
966         amdgpu_ring_write(ring, addr1);
967         amdgpu_ring_write(ring, ref);
968         amdgpu_ring_write(ring, mask);
969         amdgpu_ring_write(ring, inv); /* poll interval */
970 }
971
972 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
973 {
974         struct amdgpu_device *adev = ring->adev;
975         uint32_t scratch;
976         uint32_t tmp = 0;
977         unsigned i;
978         int r;
979
980         r = amdgpu_gfx_scratch_get(adev, &scratch);
981         if (r)
982                 return r;
983
984         WREG32(scratch, 0xCAFEDEAD);
985         r = amdgpu_ring_alloc(ring, 3);
986         if (r)
987                 goto error_free_scratch;
988
989         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
990         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
991         amdgpu_ring_write(ring, 0xDEADBEEF);
992         amdgpu_ring_commit(ring);
993
994         for (i = 0; i < adev->usec_timeout; i++) {
995                 tmp = RREG32(scratch);
996                 if (tmp == 0xDEADBEEF)
997                         break;
998                 udelay(1);
999         }
1000
1001         if (i >= adev->usec_timeout)
1002                 r = -ETIMEDOUT;
1003
1004 error_free_scratch:
1005         amdgpu_gfx_scratch_free(adev, scratch);
1006         return r;
1007 }
1008
1009 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1010 {
1011         struct amdgpu_device *adev = ring->adev;
1012         struct amdgpu_ib ib;
1013         struct dma_fence *f = NULL;
1014
1015         unsigned index;
1016         uint64_t gpu_addr;
1017         uint32_t tmp;
1018         long r;
1019
1020         r = amdgpu_device_wb_get(adev, &index);
1021         if (r)
1022                 return r;
1023
1024         gpu_addr = adev->wb.gpu_addr + (index * 4);
1025         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1026         memset(&ib, 0, sizeof(ib));
1027         r = amdgpu_ib_get(adev, NULL, 16, &ib);
1028         if (r)
1029                 goto err1;
1030
1031         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1032         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1033         ib.ptr[2] = lower_32_bits(gpu_addr);
1034         ib.ptr[3] = upper_32_bits(gpu_addr);
1035         ib.ptr[4] = 0xDEADBEEF;
1036         ib.length_dw = 5;
1037
1038         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1039         if (r)
1040                 goto err2;
1041
1042         r = dma_fence_wait_timeout(f, false, timeout);
1043         if (r == 0) {
1044                 r = -ETIMEDOUT;
1045                 goto err2;
1046         } else if (r < 0) {
1047                 goto err2;
1048         }
1049
1050         tmp = adev->wb.wb[index];
1051         if (tmp == 0xDEADBEEF)
1052                 r = 0;
1053         else
1054                 r = -EINVAL;
1055
1056 err2:
1057         amdgpu_ib_free(adev, &ib, NULL);
1058         dma_fence_put(f);
1059 err1:
1060         amdgpu_device_wb_free(adev, index);
1061         return r;
1062 }
1063
1064
1065 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1066 {
1067         release_firmware(adev->gfx.pfp_fw);
1068         adev->gfx.pfp_fw = NULL;
1069         release_firmware(adev->gfx.me_fw);
1070         adev->gfx.me_fw = NULL;
1071         release_firmware(adev->gfx.ce_fw);
1072         adev->gfx.ce_fw = NULL;
1073         release_firmware(adev->gfx.rlc_fw);
1074         adev->gfx.rlc_fw = NULL;
1075         release_firmware(adev->gfx.mec_fw);
1076         adev->gfx.mec_fw = NULL;
1077         release_firmware(adev->gfx.mec2_fw);
1078         adev->gfx.mec2_fw = NULL;
1079
1080         kfree(adev->gfx.rlc.register_list_format);
1081 }
1082
1083 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1084 {
1085         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1086
1087         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1088         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1089         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1090         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1091         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1092         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1093         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1094         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1095         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1096         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1097         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1098         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1099         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1100         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1101                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1102 }
1103
1104 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1105 {
1106         adev->gfx.me_fw_write_wait = false;
1107         adev->gfx.mec_fw_write_wait = false;
1108
1109         if ((adev->gfx.mec_fw_version < 0x000001a5) ||
1110             (adev->gfx.mec_feature_version < 46) ||
1111             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1112             (adev->gfx.pfp_feature_version < 46))
1113                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1114
1115         switch (adev->asic_type) {
1116         case CHIP_VEGA10:
1117                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1118                     (adev->gfx.me_feature_version >= 42) &&
1119                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1120                     (adev->gfx.pfp_feature_version >= 42))
1121                         adev->gfx.me_fw_write_wait = true;
1122
1123                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1124                     (adev->gfx.mec_feature_version >= 42))
1125                         adev->gfx.mec_fw_write_wait = true;
1126                 break;
1127         case CHIP_VEGA12:
1128                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1129                     (adev->gfx.me_feature_version >= 44) &&
1130                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1131                     (adev->gfx.pfp_feature_version >= 44))
1132                         adev->gfx.me_fw_write_wait = true;
1133
1134                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1135                     (adev->gfx.mec_feature_version >= 44))
1136                         adev->gfx.mec_fw_write_wait = true;
1137                 break;
1138         case CHIP_VEGA20:
1139                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1140                     (adev->gfx.me_feature_version >= 44) &&
1141                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1142                     (adev->gfx.pfp_feature_version >= 44))
1143                         adev->gfx.me_fw_write_wait = true;
1144
1145                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1146                     (adev->gfx.mec_feature_version >= 44))
1147                         adev->gfx.mec_fw_write_wait = true;
1148                 break;
1149         case CHIP_RAVEN:
1150                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1151                     (adev->gfx.me_feature_version >= 42) &&
1152                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1153                     (adev->gfx.pfp_feature_version >= 42))
1154                         adev->gfx.me_fw_write_wait = true;
1155
1156                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1157                     (adev->gfx.mec_feature_version >= 42))
1158                         adev->gfx.mec_fw_write_wait = true;
1159                 break;
1160         default:
1161                 break;
1162         }
1163 }
1164
1165 struct amdgpu_gfxoff_quirk {
1166         u16 chip_vendor;
1167         u16 chip_device;
1168         u16 subsys_vendor;
1169         u16 subsys_device;
1170         u8 revision;
1171 };
1172
1173 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1174         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1175         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1176         { 0, 0, 0, 0, 0 },
1177 };
1178
1179 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1180 {
1181         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1182
1183         while (p && p->chip_device != 0) {
1184                 if (pdev->vendor == p->chip_vendor &&
1185                     pdev->device == p->chip_device &&
1186                     pdev->subsystem_vendor == p->subsys_vendor &&
1187                     pdev->subsystem_device == p->subsys_device &&
1188                     pdev->revision == p->revision) {
1189                         return true;
1190                 }
1191                 ++p;
1192         }
1193         return false;
1194 }
1195
1196 static bool is_raven_kicker(struct amdgpu_device *adev)
1197 {
1198         if (adev->pm.fw_version >= 0x41e2b)
1199                 return true;
1200         else
1201                 return false;
1202 }
1203
1204 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1205 {
1206         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1207                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1208
1209         switch (adev->asic_type) {
1210         case CHIP_VEGA10:
1211         case CHIP_VEGA12:
1212         case CHIP_VEGA20:
1213                 break;
1214         case CHIP_RAVEN:
1215                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
1216                     ((!is_raven_kicker(adev) &&
1217                       adev->gfx.rlc_fw_version < 531) ||
1218                      (adev->gfx.rlc_feature_version < 1) ||
1219                      !adev->gfx.rlc.is_rlc_v2_1))
1220                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1221
1222                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1223                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1224                                 AMD_PG_SUPPORT_CP |
1225                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1226                 break;
1227         case CHIP_RENOIR:
1228                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1229                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1230                                 AMD_PG_SUPPORT_CP |
1231                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1232                 break;
1233         default:
1234                 break;
1235         }
1236 }
1237
1238 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1239                                           const char *chip_name)
1240 {
1241         char fw_name[30];
1242         int err;
1243         struct amdgpu_firmware_info *info = NULL;
1244         const struct common_firmware_header *header = NULL;
1245         const struct gfx_firmware_header_v1_0 *cp_hdr;
1246
1247         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1248         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1249         if (err)
1250                 goto out;
1251         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1252         if (err)
1253                 goto out;
1254         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1255         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1256         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1257
1258         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1259         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1260         if (err)
1261                 goto out;
1262         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1263         if (err)
1264                 goto out;
1265         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1266         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1267         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1268
1269         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1270         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1271         if (err)
1272                 goto out;
1273         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1274         if (err)
1275                 goto out;
1276         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1277         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1278         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1279
1280         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1281                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1282                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1283                 info->fw = adev->gfx.pfp_fw;
1284                 header = (const struct common_firmware_header *)info->fw->data;
1285                 adev->firmware.fw_size +=
1286                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1287
1288                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1289                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1290                 info->fw = adev->gfx.me_fw;
1291                 header = (const struct common_firmware_header *)info->fw->data;
1292                 adev->firmware.fw_size +=
1293                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1294
1295                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1296                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1297                 info->fw = adev->gfx.ce_fw;
1298                 header = (const struct common_firmware_header *)info->fw->data;
1299                 adev->firmware.fw_size +=
1300                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1301         }
1302
1303 out:
1304         if (err) {
1305                 dev_err(adev->dev,
1306                         "gfx9: Failed to load firmware \"%s\"\n",
1307                         fw_name);
1308                 release_firmware(adev->gfx.pfp_fw);
1309                 adev->gfx.pfp_fw = NULL;
1310                 release_firmware(adev->gfx.me_fw);
1311                 adev->gfx.me_fw = NULL;
1312                 release_firmware(adev->gfx.ce_fw);
1313                 adev->gfx.ce_fw = NULL;
1314         }
1315         return err;
1316 }
1317
1318 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1319                                           const char *chip_name)
1320 {
1321         char fw_name[30];
1322         int err;
1323         struct amdgpu_firmware_info *info = NULL;
1324         const struct common_firmware_header *header = NULL;
1325         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1326         unsigned int *tmp = NULL;
1327         unsigned int i = 0;
1328         uint16_t version_major;
1329         uint16_t version_minor;
1330         uint32_t smu_version;
1331
1332         /*
1333          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1334          * instead of picasso_rlc.bin.
1335          * Judgment method:
1336          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1337          *          or revision >= 0xD8 && revision <= 0xDF
1338          * otherwise is PCO FP5
1339          */
1340         if (!strcmp(chip_name, "picasso") &&
1341                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1342                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1343                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1344         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1345                 (smu_version >= 0x41e2b))
1346                 /**
1347                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1348                 */
1349                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1350         else
1351                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1352         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1353         if (err)
1354                 goto out;
1355         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1356         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1357
1358         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1359         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1360         if (version_major == 2 && version_minor == 1)
1361                 adev->gfx.rlc.is_rlc_v2_1 = true;
1362
1363         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1364         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1365         adev->gfx.rlc.save_and_restore_offset =
1366                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1367         adev->gfx.rlc.clear_state_descriptor_offset =
1368                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1369         adev->gfx.rlc.avail_scratch_ram_locations =
1370                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1371         adev->gfx.rlc.reg_restore_list_size =
1372                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1373         adev->gfx.rlc.reg_list_format_start =
1374                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1375         adev->gfx.rlc.reg_list_format_separate_start =
1376                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1377         adev->gfx.rlc.starting_offsets_start =
1378                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1379         adev->gfx.rlc.reg_list_format_size_bytes =
1380                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1381         adev->gfx.rlc.reg_list_size_bytes =
1382                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1383         adev->gfx.rlc.register_list_format =
1384                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1385                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1386         if (!adev->gfx.rlc.register_list_format) {
1387                 err = -ENOMEM;
1388                 goto out;
1389         }
1390
1391         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1392                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1393         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1394                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1395
1396         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1397
1398         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1399                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1400         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1401                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1402
1403         if (adev->gfx.rlc.is_rlc_v2_1)
1404                 gfx_v9_0_init_rlc_ext_microcode(adev);
1405
1406         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1407                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1408                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1409                 info->fw = adev->gfx.rlc_fw;
1410                 header = (const struct common_firmware_header *)info->fw->data;
1411                 adev->firmware.fw_size +=
1412                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1413
1414                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1415                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1416                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1417                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1418                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1419                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1420                         info->fw = adev->gfx.rlc_fw;
1421                         adev->firmware.fw_size +=
1422                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1423
1424                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1425                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1426                         info->fw = adev->gfx.rlc_fw;
1427                         adev->firmware.fw_size +=
1428                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1429
1430                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1431                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1432                         info->fw = adev->gfx.rlc_fw;
1433                         adev->firmware.fw_size +=
1434                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1435                 }
1436         }
1437
1438 out:
1439         if (err) {
1440                 dev_err(adev->dev,
1441                         "gfx9: Failed to load firmware \"%s\"\n",
1442                         fw_name);
1443                 release_firmware(adev->gfx.rlc_fw);
1444                 adev->gfx.rlc_fw = NULL;
1445         }
1446         return err;
1447 }
1448
1449 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1450                                           const char *chip_name)
1451 {
1452         char fw_name[30];
1453         int err;
1454         struct amdgpu_firmware_info *info = NULL;
1455         const struct common_firmware_header *header = NULL;
1456         const struct gfx_firmware_header_v1_0 *cp_hdr;
1457
1458         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1459         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1460         if (err)
1461                 goto out;
1462         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1463         if (err)
1464                 goto out;
1465         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1466         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1467         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1468
1469
1470         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1471         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1472         if (!err) {
1473                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1474                 if (err)
1475                         goto out;
1476                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1477                 adev->gfx.mec2_fw->data;
1478                 adev->gfx.mec2_fw_version =
1479                 le32_to_cpu(cp_hdr->header.ucode_version);
1480                 adev->gfx.mec2_feature_version =
1481                 le32_to_cpu(cp_hdr->ucode_feature_version);
1482         } else {
1483                 err = 0;
1484                 adev->gfx.mec2_fw = NULL;
1485         }
1486
1487         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1488                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1489                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1490                 info->fw = adev->gfx.mec_fw;
1491                 header = (const struct common_firmware_header *)info->fw->data;
1492                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1493                 adev->firmware.fw_size +=
1494                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1495
1496                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1497                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1498                 info->fw = adev->gfx.mec_fw;
1499                 adev->firmware.fw_size +=
1500                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1501
1502                 if (adev->gfx.mec2_fw) {
1503                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1504                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1505                         info->fw = adev->gfx.mec2_fw;
1506                         header = (const struct common_firmware_header *)info->fw->data;
1507                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1508                         adev->firmware.fw_size +=
1509                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1510
1511                         /* TODO: Determine if MEC2 JT FW loading can be removed
1512                                  for all GFX V9 asic and above */
1513                         if (adev->asic_type != CHIP_ARCTURUS &&
1514                             adev->asic_type != CHIP_RENOIR) {
1515                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1516                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1517                                 info->fw = adev->gfx.mec2_fw;
1518                                 adev->firmware.fw_size +=
1519                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1520                                         PAGE_SIZE);
1521                         }
1522                 }
1523         }
1524
1525 out:
1526         gfx_v9_0_check_if_need_gfxoff(adev);
1527         gfx_v9_0_check_fw_write_wait(adev);
1528         if (err) {
1529                 dev_err(adev->dev,
1530                         "gfx9: Failed to load firmware \"%s\"\n",
1531                         fw_name);
1532                 release_firmware(adev->gfx.mec_fw);
1533                 adev->gfx.mec_fw = NULL;
1534                 release_firmware(adev->gfx.mec2_fw);
1535                 adev->gfx.mec2_fw = NULL;
1536         }
1537         return err;
1538 }
1539
1540 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1541 {
1542         const char *chip_name;
1543         int r;
1544
1545         DRM_DEBUG("\n");
1546
1547         switch (adev->asic_type) {
1548         case CHIP_VEGA10:
1549                 chip_name = "vega10";
1550                 break;
1551         case CHIP_VEGA12:
1552                 chip_name = "vega12";
1553                 break;
1554         case CHIP_VEGA20:
1555                 chip_name = "vega20";
1556                 break;
1557         case CHIP_RAVEN:
1558                 if (adev->rev_id >= 8)
1559                         chip_name = "raven2";
1560                 else if (adev->pdev->device == 0x15d8)
1561                         chip_name = "picasso";
1562                 else
1563                         chip_name = "raven";
1564                 break;
1565         case CHIP_ARCTURUS:
1566                 chip_name = "arcturus";
1567                 break;
1568         case CHIP_RENOIR:
1569                 chip_name = "renoir";
1570                 break;
1571         default:
1572                 BUG();
1573         }
1574
1575         /* No CPG in Arcturus */
1576         if (adev->asic_type != CHIP_ARCTURUS) {
1577                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1578                 if (r)
1579                         return r;
1580         }
1581
1582         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1583         if (r)
1584                 return r;
1585
1586         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1587         if (r)
1588                 return r;
1589
1590         return r;
1591 }
1592
1593 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1594 {
1595         u32 count = 0;
1596         const struct cs_section_def *sect = NULL;
1597         const struct cs_extent_def *ext = NULL;
1598
1599         /* begin clear state */
1600         count += 2;
1601         /* context control state */
1602         count += 3;
1603
1604         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1605                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1606                         if (sect->id == SECT_CONTEXT)
1607                                 count += 2 + ext->reg_count;
1608                         else
1609                                 return 0;
1610                 }
1611         }
1612
1613         /* end clear state */
1614         count += 2;
1615         /* clear state */
1616         count += 2;
1617
1618         return count;
1619 }
1620
1621 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1622                                     volatile u32 *buffer)
1623 {
1624         u32 count = 0, i;
1625         const struct cs_section_def *sect = NULL;
1626         const struct cs_extent_def *ext = NULL;
1627
1628         if (adev->gfx.rlc.cs_data == NULL)
1629                 return;
1630         if (buffer == NULL)
1631                 return;
1632
1633         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1634         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1635
1636         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1637         buffer[count++] = cpu_to_le32(0x80000000);
1638         buffer[count++] = cpu_to_le32(0x80000000);
1639
1640         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1641                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1642                         if (sect->id == SECT_CONTEXT) {
1643                                 buffer[count++] =
1644                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1645                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1646                                                 PACKET3_SET_CONTEXT_REG_START);
1647                                 for (i = 0; i < ext->reg_count; i++)
1648                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1649                         } else {
1650                                 return;
1651                         }
1652                 }
1653         }
1654
1655         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1656         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1657
1658         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1659         buffer[count++] = cpu_to_le32(0);
1660 }
1661
1662 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1663 {
1664         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1665         uint32_t pg_always_on_cu_num = 2;
1666         uint32_t always_on_cu_num;
1667         uint32_t i, j, k;
1668         uint32_t mask, cu_bitmap, counter;
1669
1670         if (adev->flags & AMD_IS_APU)
1671                 always_on_cu_num = 4;
1672         else if (adev->asic_type == CHIP_VEGA12)
1673                 always_on_cu_num = 8;
1674         else
1675                 always_on_cu_num = 12;
1676
1677         mutex_lock(&adev->grbm_idx_mutex);
1678         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1679                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1680                         mask = 1;
1681                         cu_bitmap = 0;
1682                         counter = 0;
1683                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1684
1685                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1686                                 if (cu_info->bitmap[i][j] & mask) {
1687                                         if (counter == pg_always_on_cu_num)
1688                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1689                                         if (counter < always_on_cu_num)
1690                                                 cu_bitmap |= mask;
1691                                         else
1692                                                 break;
1693                                         counter++;
1694                                 }
1695                                 mask <<= 1;
1696                         }
1697
1698                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1699                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1700                 }
1701         }
1702         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1703         mutex_unlock(&adev->grbm_idx_mutex);
1704 }
1705
1706 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1707 {
1708         uint32_t data;
1709
1710         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1711         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1712         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1713         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1714         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1715
1716         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1717         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1718
1719         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1720         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1721
1722         mutex_lock(&adev->grbm_idx_mutex);
1723         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1724         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1725         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1726
1727         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1728         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1729         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1730         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1731         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1732
1733         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1734         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1735         data &= 0x0000FFFF;
1736         data |= 0x00C00000;
1737         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1738
1739         /*
1740          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1741          * programmed in gfx_v9_0_init_always_on_cu_mask()
1742          */
1743
1744         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1745          * but used for RLC_LB_CNTL configuration */
1746         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1747         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1748         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1749         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1750         mutex_unlock(&adev->grbm_idx_mutex);
1751
1752         gfx_v9_0_init_always_on_cu_mask(adev);
1753 }
1754
1755 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1756 {
1757         uint32_t data;
1758
1759         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1760         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1761         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1762         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1763         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1764
1765         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1766         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1767
1768         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1769         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1770
1771         mutex_lock(&adev->grbm_idx_mutex);
1772         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1773         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1774         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1775
1776         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1777         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1778         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1779         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1780         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1781
1782         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1783         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1784         data &= 0x0000FFFF;
1785         data |= 0x00C00000;
1786         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1787
1788         /*
1789          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1790          * programmed in gfx_v9_0_init_always_on_cu_mask()
1791          */
1792
1793         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1794          * but used for RLC_LB_CNTL configuration */
1795         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1796         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1797         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1798         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1799         mutex_unlock(&adev->grbm_idx_mutex);
1800
1801         gfx_v9_0_init_always_on_cu_mask(adev);
1802 }
1803
1804 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1805 {
1806         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1807 }
1808
1809 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1810 {
1811         return 5;
1812 }
1813
1814 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1815 {
1816         const struct cs_section_def *cs_data;
1817         int r;
1818
1819         adev->gfx.rlc.cs_data = gfx9_cs_data;
1820
1821         cs_data = adev->gfx.rlc.cs_data;
1822
1823         if (cs_data) {
1824                 /* init clear state block */
1825                 r = amdgpu_gfx_rlc_init_csb(adev);
1826                 if (r)
1827                         return r;
1828         }
1829
1830         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1831                 /* TODO: double check the cp_table_size for RV */
1832                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1833                 r = amdgpu_gfx_rlc_init_cpt(adev);
1834                 if (r)
1835                         return r;
1836         }
1837
1838         switch (adev->asic_type) {
1839         case CHIP_RAVEN:
1840                 gfx_v9_0_init_lbpw(adev);
1841                 break;
1842         case CHIP_VEGA20:
1843                 gfx_v9_4_init_lbpw(adev);
1844                 break;
1845         default:
1846                 break;
1847         }
1848
1849         return 0;
1850 }
1851
1852 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1853 {
1854         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1855         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1856 }
1857
1858 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1859 {
1860         int r;
1861         u32 *hpd;
1862         const __le32 *fw_data;
1863         unsigned fw_size;
1864         u32 *fw;
1865         size_t mec_hpd_size;
1866
1867         const struct gfx_firmware_header_v1_0 *mec_hdr;
1868
1869         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1870
1871         /* take ownership of the relevant compute queues */
1872         amdgpu_gfx_compute_queue_acquire(adev);
1873         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1874
1875         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1876                                       AMDGPU_GEM_DOMAIN_VRAM,
1877                                       &adev->gfx.mec.hpd_eop_obj,
1878                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1879                                       (void **)&hpd);
1880         if (r) {
1881                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1882                 gfx_v9_0_mec_fini(adev);
1883                 return r;
1884         }
1885
1886         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1887
1888         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1889         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1890
1891         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1892
1893         fw_data = (const __le32 *)
1894                 (adev->gfx.mec_fw->data +
1895                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1896         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1897
1898         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1899                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1900                                       &adev->gfx.mec.mec_fw_obj,
1901                                       &adev->gfx.mec.mec_fw_gpu_addr,
1902                                       (void **)&fw);
1903         if (r) {
1904                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1905                 gfx_v9_0_mec_fini(adev);
1906                 return r;
1907         }
1908
1909         memcpy(fw, fw_data, fw_size);
1910
1911         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1912         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1913
1914         return 0;
1915 }
1916
1917 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1918 {
1919         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1920                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1921                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1922                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1923                 (SQ_IND_INDEX__FORCE_READ_MASK));
1924         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1925 }
1926
1927 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1928                            uint32_t wave, uint32_t thread,
1929                            uint32_t regno, uint32_t num, uint32_t *out)
1930 {
1931         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1932                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1933                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1934                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1935                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1936                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1937                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1938         while (num--)
1939                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1940 }
1941
1942 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1943 {
1944         /* type 1 wave data */
1945         dst[(*no_fields)++] = 1;
1946         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1947         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1948         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1949         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1950         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1951         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1952         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1953         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1954         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1955         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1956         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1957         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1958         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1959         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1960 }
1961
1962 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1963                                      uint32_t wave, uint32_t start,
1964                                      uint32_t size, uint32_t *dst)
1965 {
1966         wave_read_regs(
1967                 adev, simd, wave, 0,
1968                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1969 }
1970
1971 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1972                                      uint32_t wave, uint32_t thread,
1973                                      uint32_t start, uint32_t size,
1974                                      uint32_t *dst)
1975 {
1976         wave_read_regs(
1977                 adev, simd, wave, thread,
1978                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1979 }
1980
1981 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1982                                   u32 me, u32 pipe, u32 q, u32 vm)
1983 {
1984         soc15_grbm_select(adev, me, pipe, q, vm);
1985 }
1986
1987 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1988         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1989         .select_se_sh = &gfx_v9_0_select_se_sh,
1990         .read_wave_data = &gfx_v9_0_read_wave_data,
1991         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1992         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1993         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1994         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1995         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1996 };
1997
1998 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
1999         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2000         .select_se_sh = &gfx_v9_0_select_se_sh,
2001         .read_wave_data = &gfx_v9_0_read_wave_data,
2002         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2003         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2004         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2005         .ras_error_inject = &gfx_v9_4_ras_error_inject,
2006         .query_ras_error_count = &gfx_v9_4_query_ras_error_count
2007 };
2008
2009 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2010 {
2011         u32 gb_addr_config;
2012         int err;
2013
2014         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2015
2016         switch (adev->asic_type) {
2017         case CHIP_VEGA10:
2018                 adev->gfx.config.max_hw_contexts = 8;
2019                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2020                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2021                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2022                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2023                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2024                 break;
2025         case CHIP_VEGA12:
2026                 adev->gfx.config.max_hw_contexts = 8;
2027                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2028                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2029                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2030                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2031                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2032                 DRM_INFO("fix gfx.config for vega12\n");
2033                 break;
2034         case CHIP_VEGA20:
2035                 adev->gfx.config.max_hw_contexts = 8;
2036                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2041                 gb_addr_config &= ~0xf3e777ff;
2042                 gb_addr_config |= 0x22014042;
2043                 /* check vbios table if gpu info is not available */
2044                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2045                 if (err)
2046                         return err;
2047                 break;
2048         case CHIP_RAVEN:
2049                 adev->gfx.config.max_hw_contexts = 8;
2050                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2051                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2052                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2053                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2054                 if (adev->rev_id >= 8)
2055                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2056                 else
2057                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2058                 break;
2059         case CHIP_ARCTURUS:
2060                 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2061                 adev->gfx.config.max_hw_contexts = 8;
2062                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2063                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2064                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2065                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2066                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2067                 gb_addr_config &= ~0xf3e777ff;
2068                 gb_addr_config |= 0x22014042;
2069                 break;
2070         case CHIP_RENOIR:
2071                 adev->gfx.config.max_hw_contexts = 8;
2072                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2073                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2074                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2075                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2076                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2077                 gb_addr_config &= ~0xf3e777ff;
2078                 gb_addr_config |= 0x22010042;
2079                 break;
2080         default:
2081                 BUG();
2082                 break;
2083         }
2084
2085         adev->gfx.config.gb_addr_config = gb_addr_config;
2086
2087         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2088                         REG_GET_FIELD(
2089                                         adev->gfx.config.gb_addr_config,
2090                                         GB_ADDR_CONFIG,
2091                                         NUM_PIPES);
2092
2093         adev->gfx.config.max_tile_pipes =
2094                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2095
2096         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2097                         REG_GET_FIELD(
2098                                         adev->gfx.config.gb_addr_config,
2099                                         GB_ADDR_CONFIG,
2100                                         NUM_BANKS);
2101         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2102                         REG_GET_FIELD(
2103                                         adev->gfx.config.gb_addr_config,
2104                                         GB_ADDR_CONFIG,
2105                                         MAX_COMPRESSED_FRAGS);
2106         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2107                         REG_GET_FIELD(
2108                                         adev->gfx.config.gb_addr_config,
2109                                         GB_ADDR_CONFIG,
2110                                         NUM_RB_PER_SE);
2111         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2112                         REG_GET_FIELD(
2113                                         adev->gfx.config.gb_addr_config,
2114                                         GB_ADDR_CONFIG,
2115                                         NUM_SHADER_ENGINES);
2116         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2117                         REG_GET_FIELD(
2118                                         adev->gfx.config.gb_addr_config,
2119                                         GB_ADDR_CONFIG,
2120                                         PIPE_INTERLEAVE_SIZE));
2121
2122         return 0;
2123 }
2124
2125 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2126                                       int mec, int pipe, int queue)
2127 {
2128         int r;
2129         unsigned irq_type;
2130         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2131
2132         ring = &adev->gfx.compute_ring[ring_id];
2133
2134         /* mec0 is me1 */
2135         ring->me = mec + 1;
2136         ring->pipe = pipe;
2137         ring->queue = queue;
2138
2139         ring->ring_obj = NULL;
2140         ring->use_doorbell = true;
2141         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2142         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2143                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2144         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2145
2146         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2147                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2148                 + ring->pipe;
2149
2150         /* type-2 packets are deprecated on MEC, use type-3 instead */
2151         r = amdgpu_ring_init(adev, ring, 1024,
2152                              &adev->gfx.eop_irq, irq_type);
2153         if (r)
2154                 return r;
2155
2156
2157         return 0;
2158 }
2159
2160 static int gfx_v9_0_sw_init(void *handle)
2161 {
2162         int i, j, k, r, ring_id;
2163         struct amdgpu_ring *ring;
2164         struct amdgpu_kiq *kiq;
2165         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2166
2167         switch (adev->asic_type) {
2168         case CHIP_VEGA10:
2169         case CHIP_VEGA12:
2170         case CHIP_VEGA20:
2171         case CHIP_RAVEN:
2172         case CHIP_ARCTURUS:
2173         case CHIP_RENOIR:
2174                 adev->gfx.mec.num_mec = 2;
2175                 break;
2176         default:
2177                 adev->gfx.mec.num_mec = 1;
2178                 break;
2179         }
2180
2181         adev->gfx.mec.num_pipe_per_mec = 4;
2182         adev->gfx.mec.num_queue_per_pipe = 8;
2183
2184         /* EOP Event */
2185         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2186         if (r)
2187                 return r;
2188
2189         /* Privileged reg */
2190         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2191                               &adev->gfx.priv_reg_irq);
2192         if (r)
2193                 return r;
2194
2195         /* Privileged inst */
2196         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2197                               &adev->gfx.priv_inst_irq);
2198         if (r)
2199                 return r;
2200
2201         /* ECC error */
2202         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2203                               &adev->gfx.cp_ecc_error_irq);
2204         if (r)
2205                 return r;
2206
2207         /* FUE error */
2208         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2209                               &adev->gfx.cp_ecc_error_irq);
2210         if (r)
2211                 return r;
2212
2213         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2214
2215         gfx_v9_0_scratch_init(adev);
2216
2217         r = gfx_v9_0_init_microcode(adev);
2218         if (r) {
2219                 DRM_ERROR("Failed to load gfx firmware!\n");
2220                 return r;
2221         }
2222
2223         r = adev->gfx.rlc.funcs->init(adev);
2224         if (r) {
2225                 DRM_ERROR("Failed to init rlc BOs!\n");
2226                 return r;
2227         }
2228
2229         r = gfx_v9_0_mec_init(adev);
2230         if (r) {
2231                 DRM_ERROR("Failed to init MEC BOs!\n");
2232                 return r;
2233         }
2234
2235         /* set up the gfx ring */
2236         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2237                 ring = &adev->gfx.gfx_ring[i];
2238                 ring->ring_obj = NULL;
2239                 if (!i)
2240                         sprintf(ring->name, "gfx");
2241                 else
2242                         sprintf(ring->name, "gfx_%d", i);
2243                 ring->use_doorbell = true;
2244                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2245                 r = amdgpu_ring_init(adev, ring, 1024,
2246                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2247                 if (r)
2248                         return r;
2249         }
2250
2251         /* set up the compute queues - allocate horizontally across pipes */
2252         ring_id = 0;
2253         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2254                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2255                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2256                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2257                                         continue;
2258
2259                                 r = gfx_v9_0_compute_ring_init(adev,
2260                                                                ring_id,
2261                                                                i, k, j);
2262                                 if (r)
2263                                         return r;
2264
2265                                 ring_id++;
2266                         }
2267                 }
2268         }
2269
2270         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2271         if (r) {
2272                 DRM_ERROR("Failed to init KIQ BOs!\n");
2273                 return r;
2274         }
2275
2276         kiq = &adev->gfx.kiq;
2277         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2278         if (r)
2279                 return r;
2280
2281         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2282         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2283         if (r)
2284                 return r;
2285
2286         adev->gfx.ce_ram_size = 0x8000;
2287
2288         r = gfx_v9_0_gpu_early_init(adev);
2289         if (r)
2290                 return r;
2291
2292         return 0;
2293 }
2294
2295
2296 static int gfx_v9_0_sw_fini(void *handle)
2297 {
2298         int i;
2299         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2300
2301         amdgpu_gfx_ras_fini(adev);
2302
2303         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2304                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2305         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2306                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2307
2308         amdgpu_gfx_mqd_sw_fini(adev);
2309         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2310         amdgpu_gfx_kiq_fini(adev);
2311
2312         gfx_v9_0_mec_fini(adev);
2313         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2314         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2315                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2316                                 &adev->gfx.rlc.cp_table_gpu_addr,
2317                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2318         }
2319         gfx_v9_0_free_microcode(adev);
2320
2321         return 0;
2322 }
2323
2324
2325 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2326 {
2327         /* TODO */
2328 }
2329
2330 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2331 {
2332         u32 data;
2333
2334         if (instance == 0xffffffff)
2335                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2336         else
2337                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2338
2339         if (se_num == 0xffffffff)
2340                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2341         else
2342                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2343
2344         if (sh_num == 0xffffffff)
2345                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2346         else
2347                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2348
2349         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2350 }
2351
2352 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2353 {
2354         u32 data, mask;
2355
2356         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2357         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2358
2359         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2360         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2361
2362         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2363                                          adev->gfx.config.max_sh_per_se);
2364
2365         return (~data) & mask;
2366 }
2367
2368 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2369 {
2370         int i, j;
2371         u32 data;
2372         u32 active_rbs = 0;
2373         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2374                                         adev->gfx.config.max_sh_per_se;
2375
2376         mutex_lock(&adev->grbm_idx_mutex);
2377         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2378                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2379                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2380                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2381                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2382                                                rb_bitmap_width_per_sh);
2383                 }
2384         }
2385         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2386         mutex_unlock(&adev->grbm_idx_mutex);
2387
2388         adev->gfx.config.backend_enable_mask = active_rbs;
2389         adev->gfx.config.num_rbs = hweight32(active_rbs);
2390 }
2391
2392 #define DEFAULT_SH_MEM_BASES    (0x6000)
2393 #define FIRST_COMPUTE_VMID      (8)
2394 #define LAST_COMPUTE_VMID       (16)
2395 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2396 {
2397         int i;
2398         uint32_t sh_mem_config;
2399         uint32_t sh_mem_bases;
2400
2401         /*
2402          * Configure apertures:
2403          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2404          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2405          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2406          */
2407         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2408
2409         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2410                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2411                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2412
2413         mutex_lock(&adev->srbm_mutex);
2414         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2415                 soc15_grbm_select(adev, 0, 0, 0, i);
2416                 /* CP and shaders */
2417                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2418                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2419         }
2420         soc15_grbm_select(adev, 0, 0, 0, 0);
2421         mutex_unlock(&adev->srbm_mutex);
2422
2423         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2424            acccess. These should be enabled by FW for target VMIDs. */
2425         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2426                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2427                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2428                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2429                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2430         }
2431 }
2432
2433 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2434 {
2435         int vmid;
2436
2437         /*
2438          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2439          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2440          * the driver can enable them for graphics. VMID0 should maintain
2441          * access so that HWS firmware can save/restore entries.
2442          */
2443         for (vmid = 1; vmid < 16; vmid++) {
2444                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2445                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2446                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2447                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2448         }
2449 }
2450
2451 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2452 {
2453         uint32_t tmp;
2454
2455         switch (adev->asic_type) {
2456         case CHIP_ARCTURUS:
2457                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2458                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2459                                         DISABLE_BARRIER_WAITCNT, 1);
2460                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2461                 break;
2462         default:
2463                 break;
2464         };
2465 }
2466
2467 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2468 {
2469         u32 tmp;
2470         int i;
2471
2472         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2473
2474         gfx_v9_0_tiling_mode_table_init(adev);
2475
2476         gfx_v9_0_setup_rb(adev);
2477         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2478         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2479
2480         /* XXX SH_MEM regs */
2481         /* where to put LDS, scratch, GPUVM in FSA64 space */
2482         mutex_lock(&adev->srbm_mutex);
2483         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2484                 soc15_grbm_select(adev, 0, 0, 0, i);
2485                 /* CP and shaders */
2486                 if (i == 0) {
2487                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2488                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2489                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2490                                             !!amdgpu_noretry);
2491                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2492                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2493                 } else {
2494                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2495                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2496                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2497                                             !!amdgpu_noretry);
2498                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2499                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2500                                 (adev->gmc.private_aperture_start >> 48));
2501                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2502                                 (adev->gmc.shared_aperture_start >> 48));
2503                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2504                 }
2505         }
2506         soc15_grbm_select(adev, 0, 0, 0, 0);
2507
2508         mutex_unlock(&adev->srbm_mutex);
2509
2510         gfx_v9_0_init_compute_vmid(adev);
2511         gfx_v9_0_init_gds_vmid(adev);
2512         gfx_v9_0_init_sq_config(adev);
2513 }
2514
2515 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2516 {
2517         u32 i, j, k;
2518         u32 mask;
2519
2520         mutex_lock(&adev->grbm_idx_mutex);
2521         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2522                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2523                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2524                         for (k = 0; k < adev->usec_timeout; k++) {
2525                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2526                                         break;
2527                                 udelay(1);
2528                         }
2529                         if (k == adev->usec_timeout) {
2530                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2531                                                       0xffffffff, 0xffffffff);
2532                                 mutex_unlock(&adev->grbm_idx_mutex);
2533                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2534                                          i, j);
2535                                 return;
2536                         }
2537                 }
2538         }
2539         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2540         mutex_unlock(&adev->grbm_idx_mutex);
2541
2542         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2543                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2544                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2545                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2546         for (k = 0; k < adev->usec_timeout; k++) {
2547                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2548                         break;
2549                 udelay(1);
2550         }
2551 }
2552
2553 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2554                                                bool enable)
2555 {
2556         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2557
2558         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2559         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2560         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2561         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2562
2563         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2564 }
2565
2566 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2567 {
2568         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2569         /* csib */
2570         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2571                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2572         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2573                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2574         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2575                         adev->gfx.rlc.clear_state_size);
2576 }
2577
2578 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2579                                 int indirect_offset,
2580                                 int list_size,
2581                                 int *unique_indirect_regs,
2582                                 int unique_indirect_reg_count,
2583                                 int *indirect_start_offsets,
2584                                 int *indirect_start_offsets_count,
2585                                 int max_start_offsets_count)
2586 {
2587         int idx;
2588
2589         for (; indirect_offset < list_size; indirect_offset++) {
2590                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2591                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2592                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2593
2594                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2595                         indirect_offset += 2;
2596
2597                         /* look for the matching indice */
2598                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2599                                 if (unique_indirect_regs[idx] ==
2600                                         register_list_format[indirect_offset] ||
2601                                         !unique_indirect_regs[idx])
2602                                         break;
2603                         }
2604
2605                         BUG_ON(idx >= unique_indirect_reg_count);
2606
2607                         if (!unique_indirect_regs[idx])
2608                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2609
2610                         indirect_offset++;
2611                 }
2612         }
2613 }
2614
2615 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2616 {
2617         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2618         int unique_indirect_reg_count = 0;
2619
2620         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2621         int indirect_start_offsets_count = 0;
2622
2623         int list_size = 0;
2624         int i = 0, j = 0;
2625         u32 tmp = 0;
2626
2627         u32 *register_list_format =
2628                 kmemdup(adev->gfx.rlc.register_list_format,
2629                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2630         if (!register_list_format)
2631                 return -ENOMEM;
2632
2633         /* setup unique_indirect_regs array and indirect_start_offsets array */
2634         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2635         gfx_v9_1_parse_ind_reg_list(register_list_format,
2636                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2637                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2638                                     unique_indirect_regs,
2639                                     unique_indirect_reg_count,
2640                                     indirect_start_offsets,
2641                                     &indirect_start_offsets_count,
2642                                     ARRAY_SIZE(indirect_start_offsets));
2643
2644         /* enable auto inc in case it is disabled */
2645         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2646         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2647         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2648
2649         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2650         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2651                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2652         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2653                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2654                         adev->gfx.rlc.register_restore[i]);
2655
2656         /* load indirect register */
2657         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2658                 adev->gfx.rlc.reg_list_format_start);
2659
2660         /* direct register portion */
2661         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2662                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2663                         register_list_format[i]);
2664
2665         /* indirect register portion */
2666         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2667                 if (register_list_format[i] == 0xFFFFFFFF) {
2668                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2669                         continue;
2670                 }
2671
2672                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2673                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2674
2675                 for (j = 0; j < unique_indirect_reg_count; j++) {
2676                         if (register_list_format[i] == unique_indirect_regs[j]) {
2677                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2678                                 break;
2679                         }
2680                 }
2681
2682                 BUG_ON(j >= unique_indirect_reg_count);
2683
2684                 i++;
2685         }
2686
2687         /* set save/restore list size */
2688         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2689         list_size = list_size >> 1;
2690         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2691                 adev->gfx.rlc.reg_restore_list_size);
2692         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2693
2694         /* write the starting offsets to RLC scratch ram */
2695         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2696                 adev->gfx.rlc.starting_offsets_start);
2697         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2698                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2699                        indirect_start_offsets[i]);
2700
2701         /* load unique indirect regs*/
2702         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2703                 if (unique_indirect_regs[i] != 0) {
2704                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2705                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2706                                unique_indirect_regs[i] & 0x3FFFF);
2707
2708                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2709                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2710                                unique_indirect_regs[i] >> 20);
2711                 }
2712         }
2713
2714         kfree(register_list_format);
2715         return 0;
2716 }
2717
2718 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2719 {
2720         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2721 }
2722
2723 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2724                                              bool enable)
2725 {
2726         uint32_t data = 0;
2727         uint32_t default_data = 0;
2728
2729         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2730         if (enable == true) {
2731                 /* enable GFXIP control over CGPG */
2732                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2733                 if(default_data != data)
2734                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2735
2736                 /* update status */
2737                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2738                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2739                 if(default_data != data)
2740                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2741         } else {
2742                 /* restore GFXIP control over GCPG */
2743                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2744                 if(default_data != data)
2745                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2746         }
2747 }
2748
2749 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2750 {
2751         uint32_t data = 0;
2752
2753         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2754                               AMD_PG_SUPPORT_GFX_SMG |
2755                               AMD_PG_SUPPORT_GFX_DMG)) {
2756                 /* init IDLE_POLL_COUNT = 60 */
2757                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2758                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2759                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2760                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2761
2762                 /* init RLC PG Delay */
2763                 data = 0;
2764                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2765                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2766                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2767                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2768                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2769
2770                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2771                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2772                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2773                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2774
2775                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2776                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2777                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2778                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2779
2780                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2781                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2782
2783                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2784                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2785                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2786
2787                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2788         }
2789 }
2790
2791 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2792                                                 bool enable)
2793 {
2794         uint32_t data = 0;
2795         uint32_t default_data = 0;
2796
2797         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2798         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2799                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2800                              enable ? 1 : 0);
2801         if (default_data != data)
2802                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2803 }
2804
2805 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2806                                                 bool enable)
2807 {
2808         uint32_t data = 0;
2809         uint32_t default_data = 0;
2810
2811         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2812         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2813                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2814                              enable ? 1 : 0);
2815         if(default_data != data)
2816                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2817 }
2818
2819 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2820                                         bool enable)
2821 {
2822         uint32_t data = 0;
2823         uint32_t default_data = 0;
2824
2825         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2826         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2827                              CP_PG_DISABLE,
2828                              enable ? 0 : 1);
2829         if(default_data != data)
2830                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2831 }
2832
2833 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2834                                                 bool enable)
2835 {
2836         uint32_t data, default_data;
2837
2838         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2839         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2840                              GFX_POWER_GATING_ENABLE,
2841                              enable ? 1 : 0);
2842         if(default_data != data)
2843                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2844 }
2845
2846 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2847                                                 bool enable)
2848 {
2849         uint32_t data, default_data;
2850
2851         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2852         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2853                              GFX_PIPELINE_PG_ENABLE,
2854                              enable ? 1 : 0);
2855         if(default_data != data)
2856                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2857
2858         if (!enable)
2859                 /* read any GFX register to wake up GFX */
2860                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2861 }
2862
2863 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2864                                                        bool enable)
2865 {
2866         uint32_t data, default_data;
2867
2868         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2869         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2870                              STATIC_PER_CU_PG_ENABLE,
2871                              enable ? 1 : 0);
2872         if(default_data != data)
2873                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2874 }
2875
2876 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2877                                                 bool enable)
2878 {
2879         uint32_t data, default_data;
2880
2881         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2882         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2883                              DYN_PER_CU_PG_ENABLE,
2884                              enable ? 1 : 0);
2885         if(default_data != data)
2886                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2887 }
2888
2889 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2890 {
2891         gfx_v9_0_init_csb(adev);
2892
2893         /*
2894          * Rlc save restore list is workable since v2_1.
2895          * And it's needed by gfxoff feature.
2896          */
2897         if (adev->gfx.rlc.is_rlc_v2_1) {
2898                 if (adev->asic_type == CHIP_VEGA12 ||
2899                     (adev->asic_type == CHIP_RAVEN &&
2900                      adev->rev_id >= 8))
2901                         gfx_v9_1_init_rlc_save_restore_list(adev);
2902                 gfx_v9_0_enable_save_restore_machine(adev);
2903         }
2904
2905         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2906                               AMD_PG_SUPPORT_GFX_SMG |
2907                               AMD_PG_SUPPORT_GFX_DMG |
2908                               AMD_PG_SUPPORT_CP |
2909                               AMD_PG_SUPPORT_GDS |
2910                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2911                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2912                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2913                 gfx_v9_0_init_gfx_power_gating(adev);
2914         }
2915 }
2916
2917 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2918 {
2919         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2920         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2921         gfx_v9_0_wait_for_rlc_serdes(adev);
2922 }
2923
2924 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2925 {
2926         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2927         udelay(50);
2928         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2929         udelay(50);
2930 }
2931
2932 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2933 {
2934 #ifdef AMDGPU_RLC_DEBUG_RETRY
2935         u32 rlc_ucode_ver;
2936 #endif
2937
2938         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2939         udelay(50);
2940
2941         /* carrizo do enable cp interrupt after cp inited */
2942         if (!(adev->flags & AMD_IS_APU)) {
2943                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2944                 udelay(50);
2945         }
2946
2947 #ifdef AMDGPU_RLC_DEBUG_RETRY
2948         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2949         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2950         if(rlc_ucode_ver == 0x108) {
2951                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2952                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2953                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2954                  * default is 0x9C4 to create a 100us interval */
2955                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2956                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2957                  * to disable the page fault retry interrupts, default is
2958                  * 0x100 (256) */
2959                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2960         }
2961 #endif
2962 }
2963
2964 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2965 {
2966         const struct rlc_firmware_header_v2_0 *hdr;
2967         const __le32 *fw_data;
2968         unsigned i, fw_size;
2969
2970         if (!adev->gfx.rlc_fw)
2971                 return -EINVAL;
2972
2973         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2974         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2975
2976         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2977                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2978         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2979
2980         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2981                         RLCG_UCODE_LOADING_START_ADDRESS);
2982         for (i = 0; i < fw_size; i++)
2983                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2984         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2985
2986         return 0;
2987 }
2988
2989 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2990 {
2991         int r;
2992
2993         if (amdgpu_sriov_vf(adev)) {
2994                 gfx_v9_0_init_csb(adev);
2995                 return 0;
2996         }
2997
2998         adev->gfx.rlc.funcs->stop(adev);
2999
3000         /* disable CG */
3001         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3002
3003         gfx_v9_0_init_pg(adev);
3004
3005         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3006                 /* legacy rlc firmware loading */
3007                 r = gfx_v9_0_rlc_load_microcode(adev);
3008                 if (r)
3009                         return r;
3010         }
3011
3012         switch (adev->asic_type) {
3013         case CHIP_RAVEN:
3014                 if (amdgpu_lbpw == 0)
3015                         gfx_v9_0_enable_lbpw(adev, false);
3016                 else
3017                         gfx_v9_0_enable_lbpw(adev, true);
3018                 break;
3019         case CHIP_VEGA20:
3020                 if (amdgpu_lbpw > 0)
3021                         gfx_v9_0_enable_lbpw(adev, true);
3022                 else
3023                         gfx_v9_0_enable_lbpw(adev, false);
3024                 break;
3025         default:
3026                 break;
3027         }
3028
3029         adev->gfx.rlc.funcs->start(adev);
3030
3031         return 0;
3032 }
3033
3034 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3035 {
3036         int i;
3037         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3038
3039         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3040         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3041         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3042         if (!enable) {
3043                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3044                         adev->gfx.gfx_ring[i].sched.ready = false;
3045         }
3046         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3047         udelay(50);
3048 }
3049
3050 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3051 {
3052         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3053         const struct gfx_firmware_header_v1_0 *ce_hdr;
3054         const struct gfx_firmware_header_v1_0 *me_hdr;
3055         const __le32 *fw_data;
3056         unsigned i, fw_size;
3057
3058         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3059                 return -EINVAL;
3060
3061         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3062                 adev->gfx.pfp_fw->data;
3063         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3064                 adev->gfx.ce_fw->data;
3065         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3066                 adev->gfx.me_fw->data;
3067
3068         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3069         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3070         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3071
3072         gfx_v9_0_cp_gfx_enable(adev, false);
3073
3074         /* PFP */
3075         fw_data = (const __le32 *)
3076                 (adev->gfx.pfp_fw->data +
3077                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3078         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3079         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3080         for (i = 0; i < fw_size; i++)
3081                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3082         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3083
3084         /* CE */
3085         fw_data = (const __le32 *)
3086                 (adev->gfx.ce_fw->data +
3087                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3088         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3089         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3090         for (i = 0; i < fw_size; i++)
3091                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3092         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3093
3094         /* ME */
3095         fw_data = (const __le32 *)
3096                 (adev->gfx.me_fw->data +
3097                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3098         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3099         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3100         for (i = 0; i < fw_size; i++)
3101                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3102         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3103
3104         return 0;
3105 }
3106
3107 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3108 {
3109         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3110         const struct cs_section_def *sect = NULL;
3111         const struct cs_extent_def *ext = NULL;
3112         int r, i, tmp;
3113
3114         /* init the CP */
3115         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3116         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3117
3118         gfx_v9_0_cp_gfx_enable(adev, true);
3119
3120         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3121         if (r) {
3122                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3123                 return r;
3124         }
3125
3126         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3127         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3128
3129         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3130         amdgpu_ring_write(ring, 0x80000000);
3131         amdgpu_ring_write(ring, 0x80000000);
3132
3133         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3134                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3135                         if (sect->id == SECT_CONTEXT) {
3136                                 amdgpu_ring_write(ring,
3137                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3138                                                ext->reg_count));
3139                                 amdgpu_ring_write(ring,
3140                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3141                                 for (i = 0; i < ext->reg_count; i++)
3142                                         amdgpu_ring_write(ring, ext->extent[i]);
3143                         }
3144                 }
3145         }
3146
3147         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3148         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3149
3150         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3151         amdgpu_ring_write(ring, 0);
3152
3153         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3154         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3155         amdgpu_ring_write(ring, 0x8000);
3156         amdgpu_ring_write(ring, 0x8000);
3157
3158         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3159         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3160                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3161         amdgpu_ring_write(ring, tmp);
3162         amdgpu_ring_write(ring, 0);
3163
3164         amdgpu_ring_commit(ring);
3165
3166         return 0;
3167 }
3168
3169 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3170 {
3171         struct amdgpu_ring *ring;
3172         u32 tmp;
3173         u32 rb_bufsz;
3174         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3175
3176         /* Set the write pointer delay */
3177         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3178
3179         /* set the RB to use vmid 0 */
3180         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3181
3182         /* Set ring buffer size */
3183         ring = &adev->gfx.gfx_ring[0];
3184         rb_bufsz = order_base_2(ring->ring_size / 8);
3185         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3186         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3187 #ifdef __BIG_ENDIAN
3188         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3189 #endif
3190         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3191
3192         /* Initialize the ring buffer's write pointers */
3193         ring->wptr = 0;
3194         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3195         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3196
3197         /* set the wb address wether it's enabled or not */
3198         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3199         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3200         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3201
3202         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3203         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3204         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3205
3206         mdelay(1);
3207         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3208
3209         rb_addr = ring->gpu_addr >> 8;
3210         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3211         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3212
3213         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3214         if (ring->use_doorbell) {
3215                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3216                                     DOORBELL_OFFSET, ring->doorbell_index);
3217                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3218                                     DOORBELL_EN, 1);
3219         } else {
3220                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3221         }
3222         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3223
3224         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3225                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3226         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3227
3228         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3229                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3230
3231
3232         /* start the ring */
3233         gfx_v9_0_cp_gfx_start(adev);
3234         ring->sched.ready = true;
3235
3236         return 0;
3237 }
3238
3239 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3240 {
3241         int i;
3242
3243         if (enable) {
3244                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3245         } else {
3246                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3247                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3248                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3249                         adev->gfx.compute_ring[i].sched.ready = false;
3250                 adev->gfx.kiq.ring.sched.ready = false;
3251         }
3252         udelay(50);
3253 }
3254
3255 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3256 {
3257         const struct gfx_firmware_header_v1_0 *mec_hdr;
3258         const __le32 *fw_data;
3259         unsigned i;
3260         u32 tmp;
3261
3262         if (!adev->gfx.mec_fw)
3263                 return -EINVAL;
3264
3265         gfx_v9_0_cp_compute_enable(adev, false);
3266
3267         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3268         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3269
3270         fw_data = (const __le32 *)
3271                 (adev->gfx.mec_fw->data +
3272                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3273         tmp = 0;
3274         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3275         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3276         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3277
3278         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3279                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3280         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3281                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3282
3283         /* MEC1 */
3284         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3285                          mec_hdr->jt_offset);
3286         for (i = 0; i < mec_hdr->jt_size; i++)
3287                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3288                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3289
3290         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3291                         adev->gfx.mec_fw_version);
3292         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3293
3294         return 0;
3295 }
3296
3297 /* KIQ functions */
3298 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3299 {
3300         uint32_t tmp;
3301         struct amdgpu_device *adev = ring->adev;
3302
3303         /* tell RLC which is KIQ queue */
3304         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3305         tmp &= 0xffffff00;
3306         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3307         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3308         tmp |= 0x80;
3309         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3310 }
3311
3312 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3313 {
3314         struct amdgpu_device *adev = ring->adev;
3315         struct v9_mqd *mqd = ring->mqd_ptr;
3316         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3317         uint32_t tmp;
3318
3319         mqd->header = 0xC0310800;
3320         mqd->compute_pipelinestat_enable = 0x00000001;
3321         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3322         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3323         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3324         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3325         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3326         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3327         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3328         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3329         mqd->compute_misc_reserved = 0x00000003;
3330
3331         mqd->dynamic_cu_mask_addr_lo =
3332                 lower_32_bits(ring->mqd_gpu_addr
3333                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3334         mqd->dynamic_cu_mask_addr_hi =
3335                 upper_32_bits(ring->mqd_gpu_addr
3336                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3337
3338         eop_base_addr = ring->eop_gpu_addr >> 8;
3339         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3340         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3341
3342         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3343         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3344         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3345                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3346
3347         mqd->cp_hqd_eop_control = tmp;
3348
3349         /* enable doorbell? */
3350         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3351
3352         if (ring->use_doorbell) {
3353                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3354                                     DOORBELL_OFFSET, ring->doorbell_index);
3355                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3356                                     DOORBELL_EN, 1);
3357                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3358                                     DOORBELL_SOURCE, 0);
3359                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3360                                     DOORBELL_HIT, 0);
3361         } else {
3362                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3363                                          DOORBELL_EN, 0);
3364         }
3365
3366         mqd->cp_hqd_pq_doorbell_control = tmp;
3367
3368         /* disable the queue if it's active */
3369         ring->wptr = 0;
3370         mqd->cp_hqd_dequeue_request = 0;
3371         mqd->cp_hqd_pq_rptr = 0;
3372         mqd->cp_hqd_pq_wptr_lo = 0;
3373         mqd->cp_hqd_pq_wptr_hi = 0;
3374
3375         /* set the pointer to the MQD */
3376         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3377         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3378
3379         /* set MQD vmid to 0 */
3380         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3381         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3382         mqd->cp_mqd_control = tmp;
3383
3384         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3385         hqd_gpu_addr = ring->gpu_addr >> 8;
3386         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3387         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3388
3389         /* set up the HQD, this is similar to CP_RB0_CNTL */
3390         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3391         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3392                             (order_base_2(ring->ring_size / 4) - 1));
3393         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3394                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3395 #ifdef __BIG_ENDIAN
3396         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3397 #endif
3398         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3399         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3400         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3401         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3402         mqd->cp_hqd_pq_control = tmp;
3403
3404         /* set the wb address whether it's enabled or not */
3405         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3406         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3407         mqd->cp_hqd_pq_rptr_report_addr_hi =
3408                 upper_32_bits(wb_gpu_addr) & 0xffff;
3409
3410         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3411         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3412         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3413         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3414
3415         tmp = 0;
3416         /* enable the doorbell if requested */
3417         if (ring->use_doorbell) {
3418                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3419                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3420                                 DOORBELL_OFFSET, ring->doorbell_index);
3421
3422                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3423                                          DOORBELL_EN, 1);
3424                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3425                                          DOORBELL_SOURCE, 0);
3426                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3427                                          DOORBELL_HIT, 0);
3428         }
3429
3430         mqd->cp_hqd_pq_doorbell_control = tmp;
3431
3432         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3433         ring->wptr = 0;
3434         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3435
3436         /* set the vmid for the queue */
3437         mqd->cp_hqd_vmid = 0;
3438
3439         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3440         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3441         mqd->cp_hqd_persistent_state = tmp;
3442
3443         /* set MIN_IB_AVAIL_SIZE */
3444         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3445         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3446         mqd->cp_hqd_ib_control = tmp;
3447
3448         /* map_queues packet doesn't need activate the queue,
3449          * so only kiq need set this field.
3450          */
3451         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3452                 mqd->cp_hqd_active = 1;
3453
3454         return 0;
3455 }
3456
3457 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3458 {
3459         struct amdgpu_device *adev = ring->adev;
3460         struct v9_mqd *mqd = ring->mqd_ptr;
3461         int j;
3462
3463         /* disable wptr polling */
3464         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3465
3466         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3467                mqd->cp_hqd_eop_base_addr_lo);
3468         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3469                mqd->cp_hqd_eop_base_addr_hi);
3470
3471         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3472         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3473                mqd->cp_hqd_eop_control);
3474
3475         /* enable doorbell? */
3476         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3477                mqd->cp_hqd_pq_doorbell_control);
3478
3479         /* disable the queue if it's active */
3480         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3481                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3482                 for (j = 0; j < adev->usec_timeout; j++) {
3483                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3484                                 break;
3485                         udelay(1);
3486                 }
3487                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3488                        mqd->cp_hqd_dequeue_request);
3489                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3490                        mqd->cp_hqd_pq_rptr);
3491                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3492                        mqd->cp_hqd_pq_wptr_lo);
3493                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3494                        mqd->cp_hqd_pq_wptr_hi);
3495         }
3496
3497         /* set the pointer to the MQD */
3498         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3499                mqd->cp_mqd_base_addr_lo);
3500         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3501                mqd->cp_mqd_base_addr_hi);
3502
3503         /* set MQD vmid to 0 */
3504         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3505                mqd->cp_mqd_control);
3506
3507         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3508         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3509                mqd->cp_hqd_pq_base_lo);
3510         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3511                mqd->cp_hqd_pq_base_hi);
3512
3513         /* set up the HQD, this is similar to CP_RB0_CNTL */
3514         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3515                mqd->cp_hqd_pq_control);
3516
3517         /* set the wb address whether it's enabled or not */
3518         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3519                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3520         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3521                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3522
3523         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3524         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3525                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3526         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3527                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3528
3529         /* enable the doorbell if requested */
3530         if (ring->use_doorbell) {
3531                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3532                                         (adev->doorbell_index.kiq * 2) << 2);
3533                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3534                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3535         }
3536
3537         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3538                mqd->cp_hqd_pq_doorbell_control);
3539
3540         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3541         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3542                mqd->cp_hqd_pq_wptr_lo);
3543         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3544                mqd->cp_hqd_pq_wptr_hi);
3545
3546         /* set the vmid for the queue */
3547         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3548
3549         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3550                mqd->cp_hqd_persistent_state);
3551
3552         /* activate the queue */
3553         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3554                mqd->cp_hqd_active);
3555
3556         if (ring->use_doorbell)
3557                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3558
3559         return 0;
3560 }
3561
3562 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3563 {
3564         struct amdgpu_device *adev = ring->adev;
3565         int j;
3566
3567         /* disable the queue if it's active */
3568         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3569
3570                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3571
3572                 for (j = 0; j < adev->usec_timeout; j++) {
3573                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3574                                 break;
3575                         udelay(1);
3576                 }
3577
3578                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3579                         DRM_DEBUG("KIQ dequeue request failed.\n");
3580
3581                         /* Manual disable if dequeue request times out */
3582                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3583                 }
3584
3585                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3586                       0);
3587         }
3588
3589         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3590         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3591         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3592         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3593         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3594         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3595         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3596         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3597
3598         return 0;
3599 }
3600
3601 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3602 {
3603         struct amdgpu_device *adev = ring->adev;
3604         struct v9_mqd *mqd = ring->mqd_ptr;
3605         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3606
3607         gfx_v9_0_kiq_setting(ring);
3608
3609         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3610                 /* reset MQD to a clean status */
3611                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3612                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3613
3614                 /* reset ring buffer */
3615                 ring->wptr = 0;
3616                 amdgpu_ring_clear_ring(ring);
3617
3618                 mutex_lock(&adev->srbm_mutex);
3619                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3620                 gfx_v9_0_kiq_init_register(ring);
3621                 soc15_grbm_select(adev, 0, 0, 0, 0);
3622                 mutex_unlock(&adev->srbm_mutex);
3623         } else {
3624                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3625                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3626                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3627                 mutex_lock(&adev->srbm_mutex);
3628                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3629                 gfx_v9_0_mqd_init(ring);
3630                 gfx_v9_0_kiq_init_register(ring);
3631                 soc15_grbm_select(adev, 0, 0, 0, 0);
3632                 mutex_unlock(&adev->srbm_mutex);
3633
3634                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3635                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3636         }
3637
3638         return 0;
3639 }
3640
3641 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3642 {
3643         struct amdgpu_device *adev = ring->adev;
3644         struct v9_mqd *mqd = ring->mqd_ptr;
3645         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3646
3647         if (!adev->in_gpu_reset && !adev->in_suspend) {
3648                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3649                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3650                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3651                 mutex_lock(&adev->srbm_mutex);
3652                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3653                 gfx_v9_0_mqd_init(ring);
3654                 soc15_grbm_select(adev, 0, 0, 0, 0);
3655                 mutex_unlock(&adev->srbm_mutex);
3656
3657                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3658                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3659         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3660                 /* reset MQD to a clean status */
3661                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3662                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3663
3664                 /* reset ring buffer */
3665                 ring->wptr = 0;
3666                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3667                 amdgpu_ring_clear_ring(ring);
3668         } else {
3669                 amdgpu_ring_clear_ring(ring);
3670         }
3671
3672         return 0;
3673 }
3674
3675 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3676 {
3677         struct amdgpu_ring *ring;
3678         int r;
3679
3680         ring = &adev->gfx.kiq.ring;
3681
3682         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3683         if (unlikely(r != 0))
3684                 return r;
3685
3686         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3687         if (unlikely(r != 0))
3688                 return r;
3689
3690         gfx_v9_0_kiq_init_queue(ring);
3691         amdgpu_bo_kunmap(ring->mqd_obj);
3692         ring->mqd_ptr = NULL;
3693         amdgpu_bo_unreserve(ring->mqd_obj);
3694         ring->sched.ready = true;
3695         return 0;
3696 }
3697
3698 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3699 {
3700         struct amdgpu_ring *ring = NULL;
3701         int r = 0, i;
3702
3703         gfx_v9_0_cp_compute_enable(adev, true);
3704
3705         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3706                 ring = &adev->gfx.compute_ring[i];
3707
3708                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3709                 if (unlikely(r != 0))
3710                         goto done;
3711                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3712                 if (!r) {
3713                         r = gfx_v9_0_kcq_init_queue(ring);
3714                         amdgpu_bo_kunmap(ring->mqd_obj);
3715                         ring->mqd_ptr = NULL;
3716                 }
3717                 amdgpu_bo_unreserve(ring->mqd_obj);
3718                 if (r)
3719                         goto done;
3720         }
3721
3722         r = amdgpu_gfx_enable_kcq(adev);
3723 done:
3724         return r;
3725 }
3726
3727 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3728 {
3729         int r, i;
3730         struct amdgpu_ring *ring;
3731
3732         if (!(adev->flags & AMD_IS_APU))
3733                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3734
3735         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3736                 if (adev->asic_type != CHIP_ARCTURUS) {
3737                         /* legacy firmware loading */
3738                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3739                         if (r)
3740                                 return r;
3741                 }
3742
3743                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3744                 if (r)
3745                         return r;
3746         }
3747
3748         r = gfx_v9_0_kiq_resume(adev);
3749         if (r)
3750                 return r;
3751
3752         if (adev->asic_type != CHIP_ARCTURUS) {
3753                 r = gfx_v9_0_cp_gfx_resume(adev);
3754                 if (r)
3755                         return r;
3756         }
3757
3758         r = gfx_v9_0_kcq_resume(adev);
3759         if (r)
3760                 return r;
3761
3762         if (adev->asic_type != CHIP_ARCTURUS) {
3763                 ring = &adev->gfx.gfx_ring[0];
3764                 r = amdgpu_ring_test_helper(ring);
3765                 if (r)
3766                         return r;
3767         }
3768
3769         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3770                 ring = &adev->gfx.compute_ring[i];
3771                 amdgpu_ring_test_helper(ring);
3772         }
3773
3774         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3775
3776         return 0;
3777 }
3778
3779 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3780 {
3781         u32 tmp;
3782
3783         if (adev->asic_type != CHIP_ARCTURUS)
3784                 return;
3785
3786         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3787         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3788                                 adev->df.hash_status.hash_64k);
3789         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3790                                 adev->df.hash_status.hash_2m);
3791         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3792                                 adev->df.hash_status.hash_1g);
3793         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3794 }
3795
3796 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3797 {
3798         if (adev->asic_type != CHIP_ARCTURUS)
3799                 gfx_v9_0_cp_gfx_enable(adev, enable);
3800         gfx_v9_0_cp_compute_enable(adev, enable);
3801 }
3802
3803 static int gfx_v9_0_hw_init(void *handle)
3804 {
3805         int r;
3806         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3807
3808         if (!amdgpu_sriov_vf(adev))
3809                 gfx_v9_0_init_golden_registers(adev);
3810
3811         gfx_v9_0_constants_init(adev);
3812
3813         gfx_v9_0_init_tcp_config(adev);
3814
3815         r = adev->gfx.rlc.funcs->resume(adev);
3816         if (r)
3817                 return r;
3818
3819         r = gfx_v9_0_cp_resume(adev);
3820         if (r)
3821                 return r;
3822
3823         return r;
3824 }
3825
3826 static int gfx_v9_0_hw_fini(void *handle)
3827 {
3828         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3829
3830         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3831         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3832         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3833
3834         /* DF freeze and kcq disable will fail */
3835         if (!amdgpu_ras_intr_triggered())
3836                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3837                 amdgpu_gfx_disable_kcq(adev);
3838
3839         if (amdgpu_sriov_vf(adev)) {
3840                 gfx_v9_0_cp_gfx_enable(adev, false);
3841                 /* must disable polling for SRIOV when hw finished, otherwise
3842                  * CPC engine may still keep fetching WB address which is already
3843                  * invalid after sw finished and trigger DMAR reading error in
3844                  * hypervisor side.
3845                  */
3846                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3847                 return 0;
3848         }
3849
3850         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3851          * otherwise KIQ is hanging when binding back
3852          */
3853         if (!adev->in_gpu_reset && !adev->in_suspend) {
3854                 mutex_lock(&adev->srbm_mutex);
3855                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3856                                 adev->gfx.kiq.ring.pipe,
3857                                 adev->gfx.kiq.ring.queue, 0);
3858                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3859                 soc15_grbm_select(adev, 0, 0, 0, 0);
3860                 mutex_unlock(&adev->srbm_mutex);
3861         }
3862
3863         gfx_v9_0_cp_enable(adev, false);
3864         adev->gfx.rlc.funcs->stop(adev);
3865
3866         return 0;
3867 }
3868
3869 static int gfx_v9_0_suspend(void *handle)
3870 {
3871         return gfx_v9_0_hw_fini(handle);
3872 }
3873
3874 static int gfx_v9_0_resume(void *handle)
3875 {
3876         return gfx_v9_0_hw_init(handle);
3877 }
3878
3879 static bool gfx_v9_0_is_idle(void *handle)
3880 {
3881         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3882
3883         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3884                                 GRBM_STATUS, GUI_ACTIVE))
3885                 return false;
3886         else
3887                 return true;
3888 }
3889
3890 static int gfx_v9_0_wait_for_idle(void *handle)
3891 {
3892         unsigned i;
3893         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3894
3895         for (i = 0; i < adev->usec_timeout; i++) {
3896                 if (gfx_v9_0_is_idle(handle))
3897                         return 0;
3898                 udelay(1);
3899         }
3900         return -ETIMEDOUT;
3901 }
3902
3903 static int gfx_v9_0_soft_reset(void *handle)
3904 {
3905         u32 grbm_soft_reset = 0;
3906         u32 tmp;
3907         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3908
3909         /* GRBM_STATUS */
3910         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3911         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3912                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3913                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3914                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3915                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3916                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3917                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3918                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3919                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3920                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3921         }
3922
3923         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3924                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3925                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3926         }
3927
3928         /* GRBM_STATUS2 */
3929         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3930         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3931                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3932                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3933
3934
3935         if (grbm_soft_reset) {
3936                 /* stop the rlc */
3937                 adev->gfx.rlc.funcs->stop(adev);
3938
3939                 if (adev->asic_type != CHIP_ARCTURUS)
3940                         /* Disable GFX parsing/prefetching */
3941                         gfx_v9_0_cp_gfx_enable(adev, false);
3942
3943                 /* Disable MEC parsing/prefetching */
3944                 gfx_v9_0_cp_compute_enable(adev, false);
3945
3946                 if (grbm_soft_reset) {
3947                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3948                         tmp |= grbm_soft_reset;
3949                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3950                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3951                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3952
3953                         udelay(50);
3954
3955                         tmp &= ~grbm_soft_reset;
3956                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3957                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3958                 }
3959
3960                 /* Wait a little for things to settle down */
3961                 udelay(50);
3962         }
3963         return 0;
3964 }
3965
3966 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3967 {
3968         uint64_t clock;
3969
3970         amdgpu_gfx_off_ctrl(adev, false);
3971         mutex_lock(&adev->gfx.gpu_clock_mutex);
3972         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3973                 uint32_t tmp, lsb, msb, i = 0;
3974                 do {
3975                         if (i != 0)
3976                                 udelay(1);
3977                         tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3978                         lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3979                         msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3980                         i++;
3981                 } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3982                 clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3983         } else {
3984                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3985                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3986                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3987         }
3988         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3989         amdgpu_gfx_off_ctrl(adev, true);
3990         return clock;
3991 }
3992
3993 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3994                                           uint32_t vmid,
3995                                           uint32_t gds_base, uint32_t gds_size,
3996                                           uint32_t gws_base, uint32_t gws_size,
3997                                           uint32_t oa_base, uint32_t oa_size)
3998 {
3999         struct amdgpu_device *adev = ring->adev;
4000
4001         /* GDS Base */
4002         gfx_v9_0_write_data_to_reg(ring, 0, false,
4003                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4004                                    gds_base);
4005
4006         /* GDS Size */
4007         gfx_v9_0_write_data_to_reg(ring, 0, false,
4008                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4009                                    gds_size);
4010
4011         /* GWS */
4012         gfx_v9_0_write_data_to_reg(ring, 0, false,
4013                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4014                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4015
4016         /* OA */
4017         gfx_v9_0_write_data_to_reg(ring, 0, false,
4018                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4019                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4020 }
4021
4022 static const u32 vgpr_init_compute_shader[] =
4023 {
4024         0xb07c0000, 0xbe8000ff,
4025         0x000000f8, 0xbf110800,
4026         0x7e000280, 0x7e020280,
4027         0x7e040280, 0x7e060280,
4028         0x7e080280, 0x7e0a0280,
4029         0x7e0c0280, 0x7e0e0280,
4030         0x80808800, 0xbe803200,
4031         0xbf84fff5, 0xbf9c0000,
4032         0xd28c0001, 0x0001007f,
4033         0xd28d0001, 0x0002027e,
4034         0x10020288, 0xb8810904,
4035         0xb7814000, 0xd1196a01,
4036         0x00000301, 0xbe800087,
4037         0xbefc00c1, 0xd89c4000,
4038         0x00020201, 0xd89cc080,
4039         0x00040401, 0x320202ff,
4040         0x00000800, 0x80808100,
4041         0xbf84fff8, 0x7e020280,
4042         0xbf810000, 0x00000000,
4043 };
4044
4045 static const u32 sgpr_init_compute_shader[] =
4046 {
4047         0xb07c0000, 0xbe8000ff,
4048         0x0000005f, 0xbee50080,
4049         0xbe812c65, 0xbe822c65,
4050         0xbe832c65, 0xbe842c65,
4051         0xbe852c65, 0xb77c0005,
4052         0x80808500, 0xbf84fff8,
4053         0xbe800080, 0xbf810000,
4054 };
4055
4056 /* When below register arrays changed, please update gpr_reg_size,
4057   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4058   to cover all gfx9 ASICs */
4059 static const struct soc15_reg_entry vgpr_init_regs[] = {
4060    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4061    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4062    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4063    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4064    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4065    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4066    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4067    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4068    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4069    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4070    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4071    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4072    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4073    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4074 };
4075
4076 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4077    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4078    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4079    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4080    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4081    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4082    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4083    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4084    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4085    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4086    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4087    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4088    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4089    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4090    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4091 };
4092
4093 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4094    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4095    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4096    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4097    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4098    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4099    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4100    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4101    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4102    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4103    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4104    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4105    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4106    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4107    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4108 };
4109
4110 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4111    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4112    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4113    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4114    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4115    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4116    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4117    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4118    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4119    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4120    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4121    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4122    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4123    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4124    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4125    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4126    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4127    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4128    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4129    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4130    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4131    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4132    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4133    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4134    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4135    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4136    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4137    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4138    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4139    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4140    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4141    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4142    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4143    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4144    { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
4145 };
4146
4147 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4148 {
4149         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4150         int i, r;
4151
4152         /* only support when RAS is enabled */
4153         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4154                 return 0;
4155
4156         r = amdgpu_ring_alloc(ring, 7);
4157         if (r) {
4158                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4159                         ring->name, r);
4160                 return r;
4161         }
4162
4163         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4164         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4165
4166         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4167         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4168                                 PACKET3_DMA_DATA_DST_SEL(1) |
4169                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4170                                 PACKET3_DMA_DATA_ENGINE(0)));
4171         amdgpu_ring_write(ring, 0);
4172         amdgpu_ring_write(ring, 0);
4173         amdgpu_ring_write(ring, 0);
4174         amdgpu_ring_write(ring, 0);
4175         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4176                                 adev->gds.gds_size);
4177
4178         amdgpu_ring_commit(ring);
4179
4180         for (i = 0; i < adev->usec_timeout; i++) {
4181                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4182                         break;
4183                 udelay(1);
4184         }
4185
4186         if (i >= adev->usec_timeout)
4187                 r = -ETIMEDOUT;
4188
4189         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4190
4191         return r;
4192 }
4193
4194 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4195 {
4196         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4197         struct amdgpu_ib ib;
4198         struct dma_fence *f = NULL;
4199         int r, i;
4200         unsigned total_size, vgpr_offset, sgpr_offset;
4201         u64 gpu_addr;
4202
4203         int compute_dim_x = adev->gfx.config.max_shader_engines *
4204                                                 adev->gfx.config.max_cu_per_sh *
4205                                                 adev->gfx.config.max_sh_per_se;
4206         int sgpr_work_group_size = 5;
4207         int gpr_reg_size = compute_dim_x / 16 + 6;
4208
4209         /* only support when RAS is enabled */
4210         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4211                 return 0;
4212
4213         /* bail if the compute ring is not ready */
4214         if (!ring->sched.ready)
4215                 return 0;
4216
4217         total_size =
4218                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4219         total_size +=
4220                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4221         total_size +=
4222                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4223         total_size = ALIGN(total_size, 256);
4224         vgpr_offset = total_size;
4225         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4226         sgpr_offset = total_size;
4227         total_size += sizeof(sgpr_init_compute_shader);
4228
4229         /* allocate an indirect buffer to put the commands in */
4230         memset(&ib, 0, sizeof(ib));
4231         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4232         if (r) {
4233                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4234                 return r;
4235         }
4236
4237         /* load the compute shaders */
4238         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4239                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4240
4241         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4242                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4243
4244         /* init the ib length to 0 */
4245         ib.length_dw = 0;
4246
4247         /* VGPR */
4248         /* write the register state for the compute dispatch */
4249         for (i = 0; i < gpr_reg_size; i++) {
4250                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4251                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4252                                                                 - PACKET3_SET_SH_REG_START;
4253                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4254         }
4255         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4256         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4257         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4258         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4259                                                         - PACKET3_SET_SH_REG_START;
4260         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4261         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4262
4263         /* write dispatch packet */
4264         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4265         ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4266         ib.ptr[ib.length_dw++] = 1; /* y */
4267         ib.ptr[ib.length_dw++] = 1; /* z */
4268         ib.ptr[ib.length_dw++] =
4269                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4270
4271         /* write CS partial flush packet */
4272         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4273         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4274
4275         /* SGPR1 */
4276         /* write the register state for the compute dispatch */
4277         for (i = 0; i < gpr_reg_size; i++) {
4278                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4279                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4280                                                                 - PACKET3_SET_SH_REG_START;
4281                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4282         }
4283         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4284         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4285         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4286         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4287                                                         - PACKET3_SET_SH_REG_START;
4288         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4289         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4290
4291         /* write dispatch packet */
4292         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4293         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4294         ib.ptr[ib.length_dw++] = 1; /* y */
4295         ib.ptr[ib.length_dw++] = 1; /* z */
4296         ib.ptr[ib.length_dw++] =
4297                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4298
4299         /* write CS partial flush packet */
4300         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4301         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4302
4303         /* SGPR2 */
4304         /* write the register state for the compute dispatch */
4305         for (i = 0; i < gpr_reg_size; i++) {
4306                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4307                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4308                                                                 - PACKET3_SET_SH_REG_START;
4309                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4310         }
4311         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4312         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4313         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4314         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4315                                                         - PACKET3_SET_SH_REG_START;
4316         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4317         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4318
4319         /* write dispatch packet */
4320         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4321         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4322         ib.ptr[ib.length_dw++] = 1; /* y */
4323         ib.ptr[ib.length_dw++] = 1; /* z */
4324         ib.ptr[ib.length_dw++] =
4325                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4326
4327         /* write CS partial flush packet */
4328         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4329         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4330
4331         /* shedule the ib on the ring */
4332         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4333         if (r) {
4334                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4335                 goto fail;
4336         }
4337
4338         /* wait for the GPU to finish processing the IB */
4339         r = dma_fence_wait(f, false);
4340         if (r) {
4341                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4342                 goto fail;
4343         }
4344
4345         switch (adev->asic_type)
4346         {
4347         case CHIP_VEGA20:
4348                 gfx_v9_0_clear_ras_edc_counter(adev);
4349                 break;
4350         case CHIP_ARCTURUS:
4351                 gfx_v9_4_clear_ras_edc_counter(adev);
4352                 break;
4353         default:
4354                 break;
4355         }
4356
4357 fail:
4358         amdgpu_ib_free(adev, &ib, NULL);
4359         dma_fence_put(f);
4360
4361         return r;
4362 }
4363
4364 static int gfx_v9_0_early_init(void *handle)
4365 {
4366         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4367
4368         if (adev->asic_type == CHIP_ARCTURUS)
4369                 adev->gfx.num_gfx_rings = 0;
4370         else
4371                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4372         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4373         gfx_v9_0_set_kiq_pm4_funcs(adev);
4374         gfx_v9_0_set_ring_funcs(adev);
4375         gfx_v9_0_set_irq_funcs(adev);
4376         gfx_v9_0_set_gds_init(adev);
4377         gfx_v9_0_set_rlc_funcs(adev);
4378
4379         return 0;
4380 }
4381
4382 static int gfx_v9_0_ecc_late_init(void *handle)
4383 {
4384         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4385         int r;
4386
4387         /*
4388          * Temp workaround to fix the issue that CP firmware fails to
4389          * update read pointer when CPDMA is writing clearing operation
4390          * to GDS in suspend/resume sequence on several cards. So just
4391          * limit this operation in cold boot sequence.
4392          */
4393         if (!adev->in_suspend) {
4394                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4395                 if (r)
4396                         return r;
4397         }
4398
4399         /* requires IBs so do in late init after IB pool is initialized */
4400         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4401         if (r)
4402                 return r;
4403
4404         r = amdgpu_gfx_ras_late_init(adev);
4405         if (r)
4406                 return r;
4407
4408         return 0;
4409 }
4410
4411 static int gfx_v9_0_late_init(void *handle)
4412 {
4413         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4414         int r;
4415
4416         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4417         if (r)
4418                 return r;
4419
4420         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4421         if (r)
4422                 return r;
4423
4424         r = gfx_v9_0_ecc_late_init(handle);
4425         if (r)
4426                 return r;
4427
4428         return 0;
4429 }
4430
4431 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4432 {
4433         uint32_t rlc_setting;
4434
4435         /* if RLC is not enabled, do nothing */
4436         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4437         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4438                 return false;
4439
4440         return true;
4441 }
4442
4443 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4444 {
4445         uint32_t data;
4446         unsigned i;
4447
4448         data = RLC_SAFE_MODE__CMD_MASK;
4449         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4450         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4451
4452         /* wait for RLC_SAFE_MODE */
4453         for (i = 0; i < adev->usec_timeout; i++) {
4454                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4455                         break;
4456                 udelay(1);
4457         }
4458 }
4459
4460 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4461 {
4462         uint32_t data;
4463
4464         data = RLC_SAFE_MODE__CMD_MASK;
4465         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4466 }
4467
4468 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4469                                                 bool enable)
4470 {
4471         amdgpu_gfx_rlc_enter_safe_mode(adev);
4472
4473         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4474                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4475                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4476                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4477         } else {
4478                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4479                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4480                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4481         }
4482
4483         amdgpu_gfx_rlc_exit_safe_mode(adev);
4484 }
4485
4486 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4487                                                 bool enable)
4488 {
4489         /* TODO: double check if we need to perform under safe mode */
4490         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4491
4492         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4493                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4494         else
4495                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4496
4497         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4498                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4499         else
4500                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4501
4502         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4503 }
4504
4505 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4506                                                       bool enable)
4507 {
4508         uint32_t data, def;
4509
4510         amdgpu_gfx_rlc_enter_safe_mode(adev);
4511
4512         /* It is disabled by HW by default */
4513         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4514                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4515                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4516
4517                 if (adev->asic_type != CHIP_VEGA12)
4518                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4519
4520                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4521                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4522                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4523
4524                 /* only for Vega10 & Raven1 */
4525                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4526
4527                 if (def != data)
4528                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4529
4530                 /* MGLS is a global flag to control all MGLS in GFX */
4531                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4532                         /* 2 - RLC memory Light sleep */
4533                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4534                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4535                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4536                                 if (def != data)
4537                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4538                         }
4539                         /* 3 - CP memory Light sleep */
4540                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4541                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4542                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4543                                 if (def != data)
4544                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4545                         }
4546                 }
4547         } else {
4548                 /* 1 - MGCG_OVERRIDE */
4549                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4550
4551                 if (adev->asic_type != CHIP_VEGA12)
4552                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4553
4554                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4555                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4556                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4557                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4558
4559                 if (def != data)
4560                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4561
4562                 /* 2 - disable MGLS in RLC */
4563                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4564                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4565                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4566                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4567                 }
4568
4569                 /* 3 - disable MGLS in CP */
4570                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4571                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4572                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4573                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4574                 }
4575         }
4576
4577         amdgpu_gfx_rlc_exit_safe_mode(adev);
4578 }
4579
4580 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4581                                            bool enable)
4582 {
4583         uint32_t data, def;
4584
4585         if (adev->asic_type == CHIP_ARCTURUS)
4586                 return;
4587
4588         amdgpu_gfx_rlc_enter_safe_mode(adev);
4589
4590         /* Enable 3D CGCG/CGLS */
4591         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4592                 /* write cmd to clear cgcg/cgls ov */
4593                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4594                 /* unset CGCG override */
4595                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4596                 /* update CGCG and CGLS override bits */
4597                 if (def != data)
4598                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4599
4600                 /* enable 3Dcgcg FSM(0x0000363f) */
4601                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4602
4603                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4604                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4605                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4606                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4607                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4608                 if (def != data)
4609                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4610
4611                 /* set IDLE_POLL_COUNT(0x00900100) */
4612                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4613                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4614                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4615                 if (def != data)
4616                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4617         } else {
4618                 /* Disable CGCG/CGLS */
4619                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4620                 /* disable cgcg, cgls should be disabled */
4621                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4622                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4623                 /* disable cgcg and cgls in FSM */
4624                 if (def != data)
4625                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4626         }
4627
4628         amdgpu_gfx_rlc_exit_safe_mode(adev);
4629 }
4630
4631 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4632                                                       bool enable)
4633 {
4634         uint32_t def, data;
4635
4636         amdgpu_gfx_rlc_enter_safe_mode(adev);
4637
4638         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4639                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4640                 /* unset CGCG override */
4641                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4642                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4643                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4644                 else
4645                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4646                 /* update CGCG and CGLS override bits */
4647                 if (def != data)
4648                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4649
4650                 /* enable cgcg FSM(0x0000363F) */
4651                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4652
4653                 if (adev->asic_type == CHIP_ARCTURUS)
4654                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4655                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4656                 else
4657                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4658                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4659                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4660                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4661                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4662                 if (def != data)
4663                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4664
4665                 /* set IDLE_POLL_COUNT(0x00900100) */
4666                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4667                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4668                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4669                 if (def != data)
4670                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4671         } else {
4672                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4673                 /* reset CGCG/CGLS bits */
4674                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4675                 /* disable cgcg and cgls in FSM */
4676                 if (def != data)
4677                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4678         }
4679
4680         amdgpu_gfx_rlc_exit_safe_mode(adev);
4681 }
4682
4683 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4684                                             bool enable)
4685 {
4686         if (enable) {
4687                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4688                  * ===  MGCG + MGLS ===
4689                  */
4690                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4691                 /* ===  CGCG /CGLS for GFX 3D Only === */
4692                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4693                 /* ===  CGCG + CGLS === */
4694                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4695         } else {
4696                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4697                  * ===  CGCG + CGLS ===
4698                  */
4699                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4700                 /* ===  CGCG /CGLS for GFX 3D Only === */
4701                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4702                 /* ===  MGCG + MGLS === */
4703                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4704         }
4705         return 0;
4706 }
4707
4708 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4709         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4710         .set_safe_mode = gfx_v9_0_set_safe_mode,
4711         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4712         .init = gfx_v9_0_rlc_init,
4713         .get_csb_size = gfx_v9_0_get_csb_size,
4714         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4715         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4716         .resume = gfx_v9_0_rlc_resume,
4717         .stop = gfx_v9_0_rlc_stop,
4718         .reset = gfx_v9_0_rlc_reset,
4719         .start = gfx_v9_0_rlc_start
4720 };
4721
4722 static int gfx_v9_0_set_powergating_state(void *handle,
4723                                           enum amd_powergating_state state)
4724 {
4725         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4726         bool enable = (state == AMD_PG_STATE_GATE);
4727
4728         switch (adev->asic_type) {
4729         case CHIP_RAVEN:
4730         case CHIP_RENOIR:
4731                 if (!enable) {
4732                         amdgpu_gfx_off_ctrl(adev, false);
4733                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4734                 }
4735                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4736                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4737                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4738                 } else {
4739                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4740                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4741                 }
4742
4743                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4744                         gfx_v9_0_enable_cp_power_gating(adev, true);
4745                 else
4746                         gfx_v9_0_enable_cp_power_gating(adev, false);
4747
4748                 /* update gfx cgpg state */
4749                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4750
4751                 /* update mgcg state */
4752                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4753
4754                 if (enable)
4755                         amdgpu_gfx_off_ctrl(adev, true);
4756                 break;
4757         case CHIP_VEGA12:
4758                 if (!enable) {
4759                         amdgpu_gfx_off_ctrl(adev, false);
4760                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4761                 } else {
4762                         amdgpu_gfx_off_ctrl(adev, true);
4763                 }
4764                 break;
4765         default:
4766                 break;
4767         }
4768
4769         return 0;
4770 }
4771
4772 static int gfx_v9_0_set_clockgating_state(void *handle,
4773                                           enum amd_clockgating_state state)
4774 {
4775         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4776
4777         if (amdgpu_sriov_vf(adev))
4778                 return 0;
4779
4780         switch (adev->asic_type) {
4781         case CHIP_VEGA10:
4782         case CHIP_VEGA12:
4783         case CHIP_VEGA20:
4784         case CHIP_RAVEN:
4785         case CHIP_ARCTURUS:
4786         case CHIP_RENOIR:
4787                 gfx_v9_0_update_gfx_clock_gating(adev,
4788                                                  state == AMD_CG_STATE_GATE);
4789                 break;
4790         default:
4791                 break;
4792         }
4793         return 0;
4794 }
4795
4796 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4797 {
4798         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4799         int data;
4800
4801         if (amdgpu_sriov_vf(adev))
4802                 *flags = 0;
4803
4804         /* AMD_CG_SUPPORT_GFX_MGCG */
4805         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
4806         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4807                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4808
4809         /* AMD_CG_SUPPORT_GFX_CGCG */
4810         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
4811         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4812                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4813
4814         /* AMD_CG_SUPPORT_GFX_CGLS */
4815         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4816                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4817
4818         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4819         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
4820         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4821                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4822
4823         /* AMD_CG_SUPPORT_GFX_CP_LS */
4824         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
4825         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4826                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4827
4828         if (adev->asic_type != CHIP_ARCTURUS) {
4829                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4830                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
4831                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4832                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4833
4834                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4835                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4836                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4837         }
4838 }
4839
4840 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4841 {
4842         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4843 }
4844
4845 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4846 {
4847         struct amdgpu_device *adev = ring->adev;
4848         u64 wptr;
4849
4850         /* XXX check if swapping is necessary on BE */
4851         if (ring->use_doorbell) {
4852                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4853         } else {
4854                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4855                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4856         }
4857
4858         return wptr;
4859 }
4860
4861 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4862 {
4863         struct amdgpu_device *adev = ring->adev;
4864
4865         if (ring->use_doorbell) {
4866                 /* XXX check if swapping is necessary on BE */
4867                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4868                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4869         } else {
4870                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4871                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4872         }
4873 }
4874
4875 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4876 {
4877         struct amdgpu_device *adev = ring->adev;
4878         u32 ref_and_mask, reg_mem_engine;
4879         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4880
4881         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4882                 switch (ring->me) {
4883                 case 1:
4884                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4885                         break;
4886                 case 2:
4887                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4888                         break;
4889                 default:
4890                         return;
4891                 }
4892                 reg_mem_engine = 0;
4893         } else {
4894                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4895                 reg_mem_engine = 1; /* pfp */
4896         }
4897
4898         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4899                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4900                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4901                               ref_and_mask, ref_and_mask, 0x20);
4902 }
4903
4904 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4905                                         struct amdgpu_job *job,
4906                                         struct amdgpu_ib *ib,
4907                                         uint32_t flags)
4908 {
4909         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4910         u32 header, control = 0;
4911
4912         if (ib->flags & AMDGPU_IB_FLAG_CE)
4913                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4914         else
4915                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4916
4917         control |= ib->length_dw | (vmid << 24);
4918
4919         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4920                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4921
4922                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4923                         gfx_v9_0_ring_emit_de_meta(ring);
4924         }
4925
4926         amdgpu_ring_write(ring, header);
4927         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4928         amdgpu_ring_write(ring,
4929 #ifdef __BIG_ENDIAN
4930                 (2 << 0) |
4931 #endif
4932                 lower_32_bits(ib->gpu_addr));
4933         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4934         amdgpu_ring_write(ring, control);
4935 }
4936
4937 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4938                                           struct amdgpu_job *job,
4939                                           struct amdgpu_ib *ib,
4940                                           uint32_t flags)
4941 {
4942         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4943         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4944
4945         /* Currently, there is a high possibility to get wave ID mismatch
4946          * between ME and GDS, leading to a hw deadlock, because ME generates
4947          * different wave IDs than the GDS expects. This situation happens
4948          * randomly when at least 5 compute pipes use GDS ordered append.
4949          * The wave IDs generated by ME are also wrong after suspend/resume.
4950          * Those are probably bugs somewhere else in the kernel driver.
4951          *
4952          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4953          * GDS to 0 for this ring (me/pipe).
4954          */
4955         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4956                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4957                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4958                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4959         }
4960
4961         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4962         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4963         amdgpu_ring_write(ring,
4964 #ifdef __BIG_ENDIAN
4965                                 (2 << 0) |
4966 #endif
4967                                 lower_32_bits(ib->gpu_addr));
4968         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4969         amdgpu_ring_write(ring, control);
4970 }
4971
4972 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4973                                      u64 seq, unsigned flags)
4974 {
4975         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4976         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4977         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4978
4979         /* RELEASE_MEM - flush caches, send int */
4980         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4981         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4982                                                EOP_TC_NC_ACTION_EN) :
4983                                               (EOP_TCL1_ACTION_EN |
4984                                                EOP_TC_ACTION_EN |
4985                                                EOP_TC_WB_ACTION_EN |
4986                                                EOP_TC_MD_ACTION_EN)) |
4987                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4988                                  EVENT_INDEX(5)));
4989         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4990
4991         /*
4992          * the address should be Qword aligned if 64bit write, Dword
4993          * aligned if only send 32bit data low (discard data high)
4994          */
4995         if (write64bit)
4996                 BUG_ON(addr & 0x7);
4997         else
4998                 BUG_ON(addr & 0x3);
4999         amdgpu_ring_write(ring, lower_32_bits(addr));
5000         amdgpu_ring_write(ring, upper_32_bits(addr));
5001         amdgpu_ring_write(ring, lower_32_bits(seq));
5002         amdgpu_ring_write(ring, upper_32_bits(seq));
5003         amdgpu_ring_write(ring, 0);
5004 }
5005
5006 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5007 {
5008         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5009         uint32_t seq = ring->fence_drv.sync_seq;
5010         uint64_t addr = ring->fence_drv.gpu_addr;
5011
5012         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5013                               lower_32_bits(addr), upper_32_bits(addr),
5014                               seq, 0xffffffff, 4);
5015 }
5016
5017 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5018                                         unsigned vmid, uint64_t pd_addr)
5019 {
5020         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5021
5022         /* compute doesn't have PFP */
5023         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5024                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5025                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5026                 amdgpu_ring_write(ring, 0x0);
5027         }
5028 }
5029
5030 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5031 {
5032         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5033 }
5034
5035 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5036 {
5037         u64 wptr;
5038
5039         /* XXX check if swapping is necessary on BE */
5040         if (ring->use_doorbell)
5041                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5042         else
5043                 BUG();
5044         return wptr;
5045 }
5046
5047 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5048                                            bool acquire)
5049 {
5050         struct amdgpu_device *adev = ring->adev;
5051         int pipe_num, tmp, reg;
5052         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5053
5054         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5055
5056         /* first me only has 2 entries, GFX and HP3D */
5057         if (ring->me > 0)
5058                 pipe_num -= 2;
5059
5060         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5061         tmp = RREG32(reg);
5062         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5063         WREG32(reg, tmp);
5064 }
5065
5066 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5067                                             struct amdgpu_ring *ring,
5068                                             bool acquire)
5069 {
5070         int i, pipe;
5071         bool reserve;
5072         struct amdgpu_ring *iring;
5073
5074         mutex_lock(&adev->gfx.pipe_reserve_mutex);
5075         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5076         if (acquire)
5077                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5078         else
5079                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5080
5081         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5082                 /* Clear all reservations - everyone reacquires all resources */
5083                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5084                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5085                                                        true);
5086
5087                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5088                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5089                                                        true);
5090         } else {
5091                 /* Lower all pipes without a current reservation */
5092                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5093                         iring = &adev->gfx.gfx_ring[i];
5094                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5095                                                            iring->me,
5096                                                            iring->pipe,
5097                                                            0);
5098                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5099                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5100                 }
5101
5102                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5103                         iring = &adev->gfx.compute_ring[i];
5104                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5105                                                            iring->me,
5106                                                            iring->pipe,
5107                                                            0);
5108                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5109                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5110                 }
5111         }
5112
5113         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5114 }
5115
5116 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5117                                       struct amdgpu_ring *ring,
5118                                       bool acquire)
5119 {
5120         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5121         uint32_t queue_priority = acquire ? 0xf : 0x0;
5122
5123         mutex_lock(&adev->srbm_mutex);
5124         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5125
5126         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5127         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5128
5129         soc15_grbm_select(adev, 0, 0, 0, 0);
5130         mutex_unlock(&adev->srbm_mutex);
5131 }
5132
5133 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5134                                                enum drm_sched_priority priority)
5135 {
5136         struct amdgpu_device *adev = ring->adev;
5137         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5138
5139         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5140                 return;
5141
5142         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5143         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5144 }
5145
5146 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5147 {
5148         struct amdgpu_device *adev = ring->adev;
5149
5150         /* XXX check if swapping is necessary on BE */
5151         if (ring->use_doorbell) {
5152                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5153                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5154         } else{
5155                 BUG(); /* only DOORBELL method supported on gfx9 now */
5156         }
5157 }
5158
5159 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5160                                          u64 seq, unsigned int flags)
5161 {
5162         struct amdgpu_device *adev = ring->adev;
5163
5164         /* we only allocate 32bit for each seq wb address */
5165         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5166
5167         /* write fence seq to the "addr" */
5168         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5169         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5170                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5171         amdgpu_ring_write(ring, lower_32_bits(addr));
5172         amdgpu_ring_write(ring, upper_32_bits(addr));
5173         amdgpu_ring_write(ring, lower_32_bits(seq));
5174
5175         if (flags & AMDGPU_FENCE_FLAG_INT) {
5176                 /* set register to trigger INT */
5177                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5178                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5179                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5180                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5181                 amdgpu_ring_write(ring, 0);
5182                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5183         }
5184 }
5185
5186 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5187 {
5188         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5189         amdgpu_ring_write(ring, 0);
5190 }
5191
5192 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5193 {
5194         struct v9_ce_ib_state ce_payload = {0};
5195         uint64_t csa_addr;
5196         int cnt;
5197
5198         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5199         csa_addr = amdgpu_csa_vaddr(ring->adev);
5200
5201         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5202         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5203                                  WRITE_DATA_DST_SEL(8) |
5204                                  WR_CONFIRM) |
5205                                  WRITE_DATA_CACHE_POLICY(0));
5206         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5207         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5208         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5209 }
5210
5211 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5212 {
5213         struct v9_de_ib_state de_payload = {0};
5214         uint64_t csa_addr, gds_addr;
5215         int cnt;
5216
5217         csa_addr = amdgpu_csa_vaddr(ring->adev);
5218         gds_addr = csa_addr + 4096;
5219         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5220         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5221
5222         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5223         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5224         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5225                                  WRITE_DATA_DST_SEL(8) |
5226                                  WR_CONFIRM) |
5227                                  WRITE_DATA_CACHE_POLICY(0));
5228         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5229         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5230         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5231 }
5232
5233 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5234 {
5235         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5236         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5237 }
5238
5239 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5240 {
5241         uint32_t dw2 = 0;
5242
5243         if (amdgpu_sriov_vf(ring->adev))
5244                 gfx_v9_0_ring_emit_ce_meta(ring);
5245
5246         gfx_v9_0_ring_emit_tmz(ring, true);
5247
5248         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5249         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5250                 /* set load_global_config & load_global_uconfig */
5251                 dw2 |= 0x8001;
5252                 /* set load_cs_sh_regs */
5253                 dw2 |= 0x01000000;
5254                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5255                 dw2 |= 0x10002;
5256
5257                 /* set load_ce_ram if preamble presented */
5258                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5259                         dw2 |= 0x10000000;
5260         } else {
5261                 /* still load_ce_ram if this is the first time preamble presented
5262                  * although there is no context switch happens.
5263                  */
5264                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5265                         dw2 |= 0x10000000;
5266         }
5267
5268         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5269         amdgpu_ring_write(ring, dw2);
5270         amdgpu_ring_write(ring, 0);
5271 }
5272
5273 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5274 {
5275         unsigned ret;
5276         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5277         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5278         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5279         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5280         ret = ring->wptr & ring->buf_mask;
5281         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5282         return ret;
5283 }
5284
5285 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5286 {
5287         unsigned cur;
5288         BUG_ON(offset > ring->buf_mask);
5289         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5290
5291         cur = (ring->wptr & ring->buf_mask) - 1;
5292         if (likely(cur > offset))
5293                 ring->ring[offset] = cur - offset;
5294         else
5295                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5296 }
5297
5298 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5299 {
5300         struct amdgpu_device *adev = ring->adev;
5301         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5302
5303         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5304         amdgpu_ring_write(ring, 0 |     /* src: register*/
5305                                 (5 << 8) |      /* dst: memory */
5306                                 (1 << 20));     /* write confirm */
5307         amdgpu_ring_write(ring, reg);
5308         amdgpu_ring_write(ring, 0);
5309         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5310                                 kiq->reg_val_offs * 4));
5311         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5312                                 kiq->reg_val_offs * 4));
5313 }
5314
5315 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5316                                     uint32_t val)
5317 {
5318         uint32_t cmd = 0;
5319
5320         switch (ring->funcs->type) {
5321         case AMDGPU_RING_TYPE_GFX:
5322                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5323                 break;
5324         case AMDGPU_RING_TYPE_KIQ:
5325                 cmd = (1 << 16); /* no inc addr */
5326                 break;
5327         default:
5328                 cmd = WR_CONFIRM;
5329                 break;
5330         }
5331         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5332         amdgpu_ring_write(ring, cmd);
5333         amdgpu_ring_write(ring, reg);
5334         amdgpu_ring_write(ring, 0);
5335         amdgpu_ring_write(ring, val);
5336 }
5337
5338 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5339                                         uint32_t val, uint32_t mask)
5340 {
5341         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5342 }
5343
5344 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5345                                                   uint32_t reg0, uint32_t reg1,
5346                                                   uint32_t ref, uint32_t mask)
5347 {
5348         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5349         struct amdgpu_device *adev = ring->adev;
5350         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5351                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5352
5353         if (fw_version_ok)
5354                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5355                                       ref, mask, 0x20);
5356         else
5357                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5358                                                            ref, mask);
5359 }
5360
5361 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5362 {
5363         struct amdgpu_device *adev = ring->adev;
5364         uint32_t value = 0;
5365
5366         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5367         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5368         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5369         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5370         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5371 }
5372
5373 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5374                                                  enum amdgpu_interrupt_state state)
5375 {
5376         switch (state) {
5377         case AMDGPU_IRQ_STATE_DISABLE:
5378         case AMDGPU_IRQ_STATE_ENABLE:
5379                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5380                                TIME_STAMP_INT_ENABLE,
5381                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5382                 break;
5383         default:
5384                 break;
5385         }
5386 }
5387
5388 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5389                                                      int me, int pipe,
5390                                                      enum amdgpu_interrupt_state state)
5391 {
5392         u32 mec_int_cntl, mec_int_cntl_reg;
5393
5394         /*
5395          * amdgpu controls only the first MEC. That's why this function only
5396          * handles the setting of interrupts for this specific MEC. All other
5397          * pipes' interrupts are set by amdkfd.
5398          */
5399
5400         if (me == 1) {
5401                 switch (pipe) {
5402                 case 0:
5403                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5404                         break;
5405                 case 1:
5406                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5407                         break;
5408                 case 2:
5409                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5410                         break;
5411                 case 3:
5412                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5413                         break;
5414                 default:
5415                         DRM_DEBUG("invalid pipe %d\n", pipe);
5416                         return;
5417                 }
5418         } else {
5419                 DRM_DEBUG("invalid me %d\n", me);
5420                 return;
5421         }
5422
5423         switch (state) {
5424         case AMDGPU_IRQ_STATE_DISABLE:
5425                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5426                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5427                                              TIME_STAMP_INT_ENABLE, 0);
5428                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5429                 break;
5430         case AMDGPU_IRQ_STATE_ENABLE:
5431                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5432                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5433                                              TIME_STAMP_INT_ENABLE, 1);
5434                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5435                 break;
5436         default:
5437                 break;
5438         }
5439 }
5440
5441 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5442                                              struct amdgpu_irq_src *source,
5443                                              unsigned type,
5444                                              enum amdgpu_interrupt_state state)
5445 {
5446         switch (state) {
5447         case AMDGPU_IRQ_STATE_DISABLE:
5448         case AMDGPU_IRQ_STATE_ENABLE:
5449                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5450                                PRIV_REG_INT_ENABLE,
5451                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5452                 break;
5453         default:
5454                 break;
5455         }
5456
5457         return 0;
5458 }
5459
5460 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5461                                               struct amdgpu_irq_src *source,
5462                                               unsigned type,
5463                                               enum amdgpu_interrupt_state state)
5464 {
5465         switch (state) {
5466         case AMDGPU_IRQ_STATE_DISABLE:
5467         case AMDGPU_IRQ_STATE_ENABLE:
5468                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5469                                PRIV_INSTR_INT_ENABLE,
5470                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5471         default:
5472                 break;
5473         }
5474
5475         return 0;
5476 }
5477
5478 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5479         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5480                         CP_ECC_ERROR_INT_ENABLE, 1)
5481
5482 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5483         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5484                         CP_ECC_ERROR_INT_ENABLE, 0)
5485
5486 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5487                                               struct amdgpu_irq_src *source,
5488                                               unsigned type,
5489                                               enum amdgpu_interrupt_state state)
5490 {
5491         switch (state) {
5492         case AMDGPU_IRQ_STATE_DISABLE:
5493                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5494                                 CP_ECC_ERROR_INT_ENABLE, 0);
5495                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5496                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5497                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5498                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5499                 break;
5500
5501         case AMDGPU_IRQ_STATE_ENABLE:
5502                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5503                                 CP_ECC_ERROR_INT_ENABLE, 1);
5504                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5505                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5506                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5507                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5508                 break;
5509         default:
5510                 break;
5511         }
5512
5513         return 0;
5514 }
5515
5516
5517 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5518                                             struct amdgpu_irq_src *src,
5519                                             unsigned type,
5520                                             enum amdgpu_interrupt_state state)
5521 {
5522         switch (type) {
5523         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5524                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5525                 break;
5526         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5527                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5528                 break;
5529         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5530                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5531                 break;
5532         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5533                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5534                 break;
5535         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5536                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5537                 break;
5538         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5539                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5540                 break;
5541         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5542                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5543                 break;
5544         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5545                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5546                 break;
5547         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5548                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5549                 break;
5550         default:
5551                 break;
5552         }
5553         return 0;
5554 }
5555
5556 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5557                             struct amdgpu_irq_src *source,
5558                             struct amdgpu_iv_entry *entry)
5559 {
5560         int i;
5561         u8 me_id, pipe_id, queue_id;
5562         struct amdgpu_ring *ring;
5563
5564         DRM_DEBUG("IH: CP EOP\n");
5565         me_id = (entry->ring_id & 0x0c) >> 2;
5566         pipe_id = (entry->ring_id & 0x03) >> 0;
5567         queue_id = (entry->ring_id & 0x70) >> 4;
5568
5569         switch (me_id) {
5570         case 0:
5571                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5572                 break;
5573         case 1:
5574         case 2:
5575                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5576                         ring = &adev->gfx.compute_ring[i];
5577                         /* Per-queue interrupt is supported for MEC starting from VI.
5578                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5579                           */
5580                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5581                                 amdgpu_fence_process(ring);
5582                 }
5583                 break;
5584         }
5585         return 0;
5586 }
5587
5588 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5589                            struct amdgpu_iv_entry *entry)
5590 {
5591         u8 me_id, pipe_id, queue_id;
5592         struct amdgpu_ring *ring;
5593         int i;
5594
5595         me_id = (entry->ring_id & 0x0c) >> 2;
5596         pipe_id = (entry->ring_id & 0x03) >> 0;
5597         queue_id = (entry->ring_id & 0x70) >> 4;
5598
5599         switch (me_id) {
5600         case 0:
5601                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5602                 break;
5603         case 1:
5604         case 2:
5605                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5606                         ring = &adev->gfx.compute_ring[i];
5607                         if (ring->me == me_id && ring->pipe == pipe_id &&
5608                             ring->queue == queue_id)
5609                                 drm_sched_fault(&ring->sched);
5610                 }
5611                 break;
5612         }
5613 }
5614
5615 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5616                                  struct amdgpu_irq_src *source,
5617                                  struct amdgpu_iv_entry *entry)
5618 {
5619         DRM_ERROR("Illegal register access in command stream\n");
5620         gfx_v9_0_fault(adev, entry);
5621         return 0;
5622 }
5623
5624 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5625                                   struct amdgpu_irq_src *source,
5626                                   struct amdgpu_iv_entry *entry)
5627 {
5628         DRM_ERROR("Illegal instruction in command stream\n");
5629         gfx_v9_0_fault(adev, entry);
5630         return 0;
5631 }
5632
5633
5634 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5635         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5636           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5637           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5638         },
5639         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5640           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5641           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5642         },
5643         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5644           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5645           0, 0
5646         },
5647         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5648           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5649           0, 0
5650         },
5651         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5652           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5653           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5654         },
5655         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5656           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5657           0, 0
5658         },
5659         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5660           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5661           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5662         },
5663         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5664           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5665           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5666         },
5667         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5668           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5669           0, 0
5670         },
5671         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5672           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5673           0, 0
5674         },
5675         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5676           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5677           0, 0
5678         },
5679         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5680           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5681           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5682         },
5683         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5684           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5685           0, 0
5686         },
5687         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5688           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5689           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5690         },
5691         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5692           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5693           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5694           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5695         },
5696         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5697           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5698           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5699           0, 0
5700         },
5701         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5702           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5703           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5704           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5705         },
5706         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5707           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5708           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5709           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5710         },
5711         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5712           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5713           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5714           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5715         },
5716         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5717           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5718           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5719           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5720         },
5721         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5722           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5723           0, 0
5724         },
5725         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5726           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5727           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5728         },
5729         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5730           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5731           0, 0
5732         },
5733         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5734           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5735           0, 0
5736         },
5737         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5738           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5739           0, 0
5740         },
5741         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5742           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5743           0, 0
5744         },
5745         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5746           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5747           0, 0
5748         },
5749         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5750           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5751           0, 0
5752         },
5753         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5754           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5755           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5756         },
5757         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5758           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5759           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5760         },
5761         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5762           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5763           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5764         },
5765         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5766           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5767           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5768         },
5769         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5770           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5771           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5772         },
5773         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5774           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5775           0, 0
5776         },
5777         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5778           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5779           0, 0
5780         },
5781         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5782           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5783           0, 0
5784         },
5785         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5786           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5787           0, 0
5788         },
5789         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5790           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5791           0, 0
5792         },
5793         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5794           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5795           0, 0
5796         },
5797         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5798           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5799           0, 0
5800         },
5801         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5802           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5803           0, 0
5804         },
5805         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5806           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5807           0, 0
5808         },
5809         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5810           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5811           0, 0
5812         },
5813         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5814           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5815           0, 0
5816         },
5817         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5818           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5819           0, 0
5820         },
5821         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5822           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5823           0, 0
5824         },
5825         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5826           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5827           0, 0
5828         },
5829         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5830           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5831           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5832         },
5833         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5834           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5835           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5836         },
5837         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5838           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5839           0, 0
5840         },
5841         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5842           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5843           0, 0
5844         },
5845         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5846           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5847           0, 0
5848         },
5849         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5850           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5851           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5852         },
5853         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5854           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5855           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5856         },
5857         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5858           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5859           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5860         },
5861         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5862           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5863           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5864         },
5865         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5866           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5867           0, 0
5868         },
5869         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5870           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5871           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5872         },
5873         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5874           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5875           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5876         },
5877         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5878           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5879           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5880         },
5881         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5882           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5883           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5884         },
5885         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5886           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5887           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5888         },
5889         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5890           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5891           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5892         },
5893         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5894           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5895           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5896         },
5897         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5898           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5899           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5900         },
5901         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5902           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5903           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5904         },
5905         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5906           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5907           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5908         },
5909         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5910           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5911           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5912         },
5913         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5914           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5915           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5916         },
5917         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5918           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5919           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5920         },
5921         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5922           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5923           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5924         },
5925         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5926           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5927           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5928         },
5929         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5930           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5931           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5932         },
5933         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5934           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5935           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5936         },
5937         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5938           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5939           0, 0
5940         },
5941         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5942           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5943           0, 0
5944         },
5945         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5946           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5947           0, 0
5948         },
5949         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5950           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5951           0, 0
5952         },
5953         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5954           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5955           0, 0
5956         },
5957         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5958           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5959           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5960         },
5961         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5962           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5963           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5964         },
5965         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5966           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5967           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5968         },
5969         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5970           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5971           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5972         },
5973         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5974           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5975           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5976         },
5977         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5978           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5979           0, 0
5980         },
5981         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5982           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5983           0, 0
5984         },
5985         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5986           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5987           0, 0
5988         },
5989         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5990           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5991           0, 0
5992         },
5993         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5994           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5995           0, 0
5996         },
5997         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5998           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5999           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6000         },
6001         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6002           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6003           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6004         },
6005         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6006           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6007           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6008         },
6009         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6010           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6011           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6012         },
6013         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6014           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6015           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6016         },
6017         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6018           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6019           0, 0
6020         },
6021         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6022           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6023           0, 0
6024         },
6025         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6026           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6027           0, 0
6028         },
6029         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6030           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6031           0, 0
6032         },
6033         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6034           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6035           0, 0
6036         },
6037         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6038           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6039           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6040         },
6041         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6042           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6043           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6044         },
6045         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6046           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6047           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6048         },
6049         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6050           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6051           0, 0
6052         },
6053         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6054           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6055           0, 0
6056         },
6057         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6058           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6059           0, 0
6060         },
6061         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6062           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6063           0, 0
6064         },
6065         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6066           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6067           0, 0
6068         },
6069         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6070           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6071           0, 0
6072         }
6073 };
6074
6075 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6076                                      void *inject_if)
6077 {
6078         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6079         int ret;
6080         struct ta_ras_trigger_error_input block_info = { 0 };
6081
6082         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6083                 return -EINVAL;
6084
6085         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6086                 return -EINVAL;
6087
6088         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6089                 return -EPERM;
6090
6091         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6092               info->head.type)) {
6093                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6094                         ras_gfx_subblocks[info->head.sub_block_index].name,
6095                         info->head.type);
6096                 return -EPERM;
6097         }
6098
6099         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6100               info->head.type)) {
6101                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6102                         ras_gfx_subblocks[info->head.sub_block_index].name,
6103                         info->head.type);
6104                 return -EPERM;
6105         }
6106
6107         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6108         block_info.sub_block_index =
6109                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6110         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6111         block_info.address = info->address;
6112         block_info.value = info->value;
6113
6114         mutex_lock(&adev->grbm_idx_mutex);
6115         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6116         mutex_unlock(&adev->grbm_idx_mutex);
6117
6118         return ret;
6119 }
6120
6121 static const char *vml2_mems[] = {
6122         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6123         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6124         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6125         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6126         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6127         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6128         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6129         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6130         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6131         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6132         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6133         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6134         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6135         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6136         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6137         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6138 };
6139
6140 static const char *vml2_walker_mems[] = {
6141         "UTC_VML2_CACHE_PDE0_MEM0",
6142         "UTC_VML2_CACHE_PDE0_MEM1",
6143         "UTC_VML2_CACHE_PDE1_MEM0",
6144         "UTC_VML2_CACHE_PDE1_MEM1",
6145         "UTC_VML2_CACHE_PDE2_MEM0",
6146         "UTC_VML2_CACHE_PDE2_MEM1",
6147         "UTC_VML2_RDIF_LOG_FIFO",
6148 };
6149
6150 static const char *atc_l2_cache_2m_mems[] = {
6151         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6152         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6153         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6154         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6155 };
6156
6157 static const char *atc_l2_cache_4k_mems[] = {
6158         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6159         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6160         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6161         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6162         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6163         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6164         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6165         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6166         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6167         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6168         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6169         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6170         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6171         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6172         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6173         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6174         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6175         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6176         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6177         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6178         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6179         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6180         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6181         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6182         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6183         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6184         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6185         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6186         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6187         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6188         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6189         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6190 };
6191
6192 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6193                                          struct ras_err_data *err_data)
6194 {
6195         uint32_t i, data;
6196         uint32_t sec_count, ded_count;
6197
6198         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6199         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6200         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6201         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6202         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6203         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6204         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6205         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6206
6207         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6208                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6209                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6210
6211                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6212                 if (sec_count) {
6213                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6214                                  vml2_mems[i], sec_count);
6215                         err_data->ce_count += sec_count;
6216                 }
6217
6218                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6219                 if (ded_count) {
6220                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6221                                  vml2_mems[i], ded_count);
6222                         err_data->ue_count += ded_count;
6223                 }
6224         }
6225
6226         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6227                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6228                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6229
6230                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6231                                                 SEC_COUNT);
6232                 if (sec_count) {
6233                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6234                                  vml2_walker_mems[i], sec_count);
6235                         err_data->ce_count += sec_count;
6236                 }
6237
6238                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6239                                                 DED_COUNT);
6240                 if (ded_count) {
6241                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6242                                  vml2_walker_mems[i], ded_count);
6243                         err_data->ue_count += ded_count;
6244                 }
6245         }
6246
6247         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6248                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6249                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6250
6251                 sec_count = (data & 0x00006000L) >> 0xd;
6252                 if (sec_count) {
6253                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6254                                  atc_l2_cache_2m_mems[i], sec_count);
6255                         err_data->ce_count += sec_count;
6256                 }
6257         }
6258
6259         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6260                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6261                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6262
6263                 sec_count = (data & 0x00006000L) >> 0xd;
6264                 if (sec_count) {
6265                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6266                                  atc_l2_cache_4k_mems[i], sec_count);
6267                         err_data->ce_count += sec_count;
6268                 }
6269
6270                 ded_count = (data & 0x00018000L) >> 0xf;
6271                 if (ded_count) {
6272                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6273                                  atc_l2_cache_4k_mems[i], ded_count);
6274                         err_data->ue_count += ded_count;
6275                 }
6276         }
6277
6278         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6279         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6280         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6281         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6282
6283         return 0;
6284 }
6285
6286 static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
6287         uint32_t se_id, uint32_t inst_id, uint32_t value,
6288         uint32_t *sec_count, uint32_t *ded_count)
6289 {
6290         uint32_t i;
6291         uint32_t sec_cnt, ded_cnt;
6292
6293         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6294                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6295                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6296                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6297                         continue;
6298
6299                 sec_cnt = (value &
6300                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6301                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6302                 if (sec_cnt) {
6303                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6304                                 gfx_v9_0_ras_fields[i].name,
6305                                 se_id, inst_id,
6306                                 sec_cnt);
6307                         *sec_count += sec_cnt;
6308                 }
6309
6310                 ded_cnt = (value &
6311                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6312                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6313                 if (ded_cnt) {
6314                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6315                                 gfx_v9_0_ras_fields[i].name,
6316                                 se_id, inst_id,
6317                                 ded_cnt);
6318                         *ded_count += ded_cnt;
6319                 }
6320         }
6321
6322         return 0;
6323 }
6324
6325 static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev)
6326 {
6327         int i, j, k;
6328
6329         /* read back registers to clear the counters */
6330         mutex_lock(&adev->grbm_idx_mutex);
6331         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6332                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6333                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6334                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6335                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6336                         }
6337                 }
6338         }
6339         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6340         mutex_unlock(&adev->grbm_idx_mutex);
6341
6342         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6343         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6344         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6345         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6346         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6347         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6348         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6349         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6350
6351         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6352                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6353                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6354         }
6355
6356         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6357                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6358                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6359         }
6360
6361         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6362                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6363                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6364         }
6365
6366         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6367                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6368                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6369         }
6370
6371         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6372         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6373         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6374         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6375 }
6376
6377 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6378                                           void *ras_error_status)
6379 {
6380         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6381         uint32_t sec_count = 0, ded_count = 0;
6382         uint32_t i, j, k;
6383         uint32_t reg_value;
6384
6385         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6386                 return -EINVAL;
6387
6388         err_data->ue_count = 0;
6389         err_data->ce_count = 0;
6390
6391         mutex_lock(&adev->grbm_idx_mutex);
6392
6393         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6394                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6395                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6396                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6397                                 reg_value =
6398                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6399                                 if (reg_value)
6400                                         gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
6401                                                         j, k, reg_value,
6402                                                         &sec_count, &ded_count);
6403                         }
6404                 }
6405         }
6406
6407         err_data->ce_count += sec_count;
6408         err_data->ue_count += ded_count;
6409
6410         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6411         mutex_unlock(&adev->grbm_idx_mutex);
6412
6413         gfx_v9_0_query_utc_edc_status(adev, err_data);
6414
6415         return 0;
6416 }
6417
6418 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6419         .name = "gfx_v9_0",
6420         .early_init = gfx_v9_0_early_init,
6421         .late_init = gfx_v9_0_late_init,
6422         .sw_init = gfx_v9_0_sw_init,
6423         .sw_fini = gfx_v9_0_sw_fini,
6424         .hw_init = gfx_v9_0_hw_init,
6425         .hw_fini = gfx_v9_0_hw_fini,
6426         .suspend = gfx_v9_0_suspend,
6427         .resume = gfx_v9_0_resume,
6428         .is_idle = gfx_v9_0_is_idle,
6429         .wait_for_idle = gfx_v9_0_wait_for_idle,
6430         .soft_reset = gfx_v9_0_soft_reset,
6431         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6432         .set_powergating_state = gfx_v9_0_set_powergating_state,
6433         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6434 };
6435
6436 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6437         .type = AMDGPU_RING_TYPE_GFX,
6438         .align_mask = 0xff,
6439         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6440         .support_64bit_ptrs = true,
6441         .vmhub = AMDGPU_GFXHUB_0,
6442         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6443         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6444         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6445         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6446                 5 +  /* COND_EXEC */
6447                 7 +  /* PIPELINE_SYNC */
6448                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6449                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6450                 2 + /* VM_FLUSH */
6451                 8 +  /* FENCE for VM_FLUSH */
6452                 20 + /* GDS switch */
6453                 4 + /* double SWITCH_BUFFER,
6454                        the first COND_EXEC jump to the place just
6455                            prior to this double SWITCH_BUFFER  */
6456                 5 + /* COND_EXEC */
6457                 7 +      /*     HDP_flush */
6458                 4 +      /*     VGT_flush */
6459                 14 + /* CE_META */
6460                 31 + /* DE_META */
6461                 3 + /* CNTX_CTRL */
6462                 5 + /* HDP_INVL */
6463                 8 + 8 + /* FENCE x2 */
6464                 2, /* SWITCH_BUFFER */
6465         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6466         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6467         .emit_fence = gfx_v9_0_ring_emit_fence,
6468         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6469         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6470         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6471         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6472         .test_ring = gfx_v9_0_ring_test_ring,
6473         .test_ib = gfx_v9_0_ring_test_ib,
6474         .insert_nop = amdgpu_ring_insert_nop,
6475         .pad_ib = amdgpu_ring_generic_pad_ib,
6476         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6477         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6478         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6479         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6480         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6481         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6482         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6483         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6484         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6485 };
6486
6487 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6488         .type = AMDGPU_RING_TYPE_COMPUTE,
6489         .align_mask = 0xff,
6490         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6491         .support_64bit_ptrs = true,
6492         .vmhub = AMDGPU_GFXHUB_0,
6493         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6494         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6495         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6496         .emit_frame_size =
6497                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6498                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6499                 5 + /* hdp invalidate */
6500                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6501                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6502                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6503                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6504                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6505         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6506         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6507         .emit_fence = gfx_v9_0_ring_emit_fence,
6508         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6509         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6510         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6511         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6512         .test_ring = gfx_v9_0_ring_test_ring,
6513         .test_ib = gfx_v9_0_ring_test_ib,
6514         .insert_nop = amdgpu_ring_insert_nop,
6515         .pad_ib = amdgpu_ring_generic_pad_ib,
6516         .set_priority = gfx_v9_0_ring_set_priority_compute,
6517         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6518         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6519         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6520 };
6521
6522 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6523         .type = AMDGPU_RING_TYPE_KIQ,
6524         .align_mask = 0xff,
6525         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6526         .support_64bit_ptrs = true,
6527         .vmhub = AMDGPU_GFXHUB_0,
6528         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6529         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6530         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6531         .emit_frame_size =
6532                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6533                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6534                 5 + /* hdp invalidate */
6535                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6536                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6537                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6538                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6539                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6540         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6541         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6542         .test_ring = gfx_v9_0_ring_test_ring,
6543         .insert_nop = amdgpu_ring_insert_nop,
6544         .pad_ib = amdgpu_ring_generic_pad_ib,
6545         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6546         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6547         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6548         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6549 };
6550
6551 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6552 {
6553         int i;
6554
6555         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6556
6557         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6558                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6559
6560         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6561                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6562 }
6563
6564 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6565         .set = gfx_v9_0_set_eop_interrupt_state,
6566         .process = gfx_v9_0_eop_irq,
6567 };
6568
6569 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6570         .set = gfx_v9_0_set_priv_reg_fault_state,
6571         .process = gfx_v9_0_priv_reg_irq,
6572 };
6573
6574 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6575         .set = gfx_v9_0_set_priv_inst_fault_state,
6576         .process = gfx_v9_0_priv_inst_irq,
6577 };
6578
6579 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6580         .set = gfx_v9_0_set_cp_ecc_error_state,
6581         .process = amdgpu_gfx_cp_ecc_error_irq,
6582 };
6583
6584
6585 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6586 {
6587         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6588         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6589
6590         adev->gfx.priv_reg_irq.num_types = 1;
6591         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6592
6593         adev->gfx.priv_inst_irq.num_types = 1;
6594         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6595
6596         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6597         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6598 }
6599
6600 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6601 {
6602         switch (adev->asic_type) {
6603         case CHIP_VEGA10:
6604         case CHIP_VEGA12:
6605         case CHIP_VEGA20:
6606         case CHIP_RAVEN:
6607         case CHIP_ARCTURUS:
6608         case CHIP_RENOIR:
6609                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6610                 break;
6611         default:
6612                 break;
6613         }
6614 }
6615
6616 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6617 {
6618         /* init asci gds info */
6619         switch (adev->asic_type) {
6620         case CHIP_VEGA10:
6621         case CHIP_VEGA12:
6622         case CHIP_VEGA20:
6623                 adev->gds.gds_size = 0x10000;
6624                 break;
6625         case CHIP_RAVEN:
6626         case CHIP_ARCTURUS:
6627                 adev->gds.gds_size = 0x1000;
6628                 break;
6629         default:
6630                 adev->gds.gds_size = 0x10000;
6631                 break;
6632         }
6633
6634         switch (adev->asic_type) {
6635         case CHIP_VEGA10:
6636         case CHIP_VEGA20:
6637                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6638                 break;
6639         case CHIP_VEGA12:
6640                 adev->gds.gds_compute_max_wave_id = 0x27f;
6641                 break;
6642         case CHIP_RAVEN:
6643                 if (adev->rev_id >= 0x8)
6644                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6645                 else
6646                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6647                 break;
6648         case CHIP_ARCTURUS:
6649                 adev->gds.gds_compute_max_wave_id = 0xfff;
6650                 break;
6651         default:
6652                 /* this really depends on the chip */
6653                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6654                 break;
6655         }
6656
6657         adev->gds.gws_size = 64;
6658         adev->gds.oa_size = 16;
6659 }
6660
6661 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6662                                                  u32 bitmap)
6663 {
6664         u32 data;
6665
6666         if (!bitmap)
6667                 return;
6668
6669         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6670         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6671
6672         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6673 }
6674
6675 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6676 {
6677         u32 data, mask;
6678
6679         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6680         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6681
6682         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6683         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6684
6685         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6686
6687         return (~data) & mask;
6688 }
6689
6690 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6691                                  struct amdgpu_cu_info *cu_info)
6692 {
6693         int i, j, k, counter, active_cu_number = 0;
6694         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6695         unsigned disable_masks[4 * 4];
6696
6697         if (!adev || !cu_info)
6698                 return -EINVAL;
6699
6700         /*
6701          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6702          */
6703         if (adev->gfx.config.max_shader_engines *
6704                 adev->gfx.config.max_sh_per_se > 16)
6705                 return -EINVAL;
6706
6707         amdgpu_gfx_parse_disable_cu(disable_masks,
6708                                     adev->gfx.config.max_shader_engines,
6709                                     adev->gfx.config.max_sh_per_se);
6710
6711         mutex_lock(&adev->grbm_idx_mutex);
6712         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6713                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6714                         mask = 1;
6715                         ao_bitmap = 0;
6716                         counter = 0;
6717                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6718                         gfx_v9_0_set_user_cu_inactive_bitmap(
6719                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6720                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6721
6722                         /*
6723                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6724                          * 4x4 size array, and it's usually suitable for Vega
6725                          * ASICs which has 4*2 SE/SH layout.
6726                          * But for Arcturus, SE/SH layout is changed to 8*1.
6727                          * To mostly reduce the impact, we make it compatible
6728                          * with current bitmap array as below:
6729                          *    SE4,SH0 --> bitmap[0][1]
6730                          *    SE5,SH0 --> bitmap[1][1]
6731                          *    SE6,SH0 --> bitmap[2][1]
6732                          *    SE7,SH0 --> bitmap[3][1]
6733                          */
6734                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6735
6736                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6737                                 if (bitmap & mask) {
6738                                         if (counter < adev->gfx.config.max_cu_per_sh)
6739                                                 ao_bitmap |= mask;
6740                                         counter ++;
6741                                 }
6742                                 mask <<= 1;
6743                         }
6744                         active_cu_number += counter;
6745                         if (i < 2 && j < 2)
6746                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6747                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6748                 }
6749         }
6750         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6751         mutex_unlock(&adev->grbm_idx_mutex);
6752
6753         cu_info->number = active_cu_number;
6754         cu_info->ao_cu_mask = ao_cu_mask;
6755         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6756
6757         return 0;
6758 }
6759
6760 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6761 {
6762         .type = AMD_IP_BLOCK_TYPE_GFX,
6763         .major = 9,
6764         .minor = 0,
6765         .rev = 0,
6766         .funcs = &gfx_v9_0_ip_funcs,
6767 };