Merge tag 'iio-fixes-for-5.6a' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23...
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #include "gfx_v9_4.h"
52
53 #define GFX9_NUM_GFX_RINGS     1
54 #define GFX9_MEC_HPD_SIZE 4096
55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
56 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
57
58 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
59 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
62 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
63 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
64
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
121 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
122
123 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
124 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
126 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
128 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
130 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
132 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
133 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
134 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
135
136 enum ta_ras_gfx_subblock {
137         /*CPC*/
138         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
139         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
140         TA_RAS_BLOCK__GFX_CPC_UCODE,
141         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
142         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
143         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
144         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
145         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
146         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
147         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
148         /* CPF*/
149         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
150         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
151         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
152         TA_RAS_BLOCK__GFX_CPF_TAG,
153         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
154         /* CPG*/
155         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
156         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
157         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
158         TA_RAS_BLOCK__GFX_CPG_TAG,
159         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
160         /* GDS*/
161         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
162         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
163         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
164         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
165         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
166         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
167         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
168         /* SPI*/
169         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
170         /* SQ*/
171         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
172         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
173         TA_RAS_BLOCK__GFX_SQ_LDS_D,
174         TA_RAS_BLOCK__GFX_SQ_LDS_I,
175         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
176         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
177         /* SQC (3 ranges)*/
178         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179         /* SQC range 0*/
180         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
181         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
182                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
186         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
187         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
188         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
190                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
191         /* SQC range 1*/
192         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
194                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
196         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
197         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
201         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
202         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
204                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
205         /* SQC range 2*/
206         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
208                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
215         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
216         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
218                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
219         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
220         /* TA*/
221         TA_RAS_BLOCK__GFX_TA_INDEX_START,
222         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
223         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
224         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
225         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
226         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
227         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
228         /* TCA*/
229         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
230         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
231         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
232         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
233         /* TCC (5 sub-ranges)*/
234         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235         /* TCC range 0*/
236         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
239         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
240         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
241         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
242         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
243         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
244         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
245         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
246         /* TCC range 1*/
247         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
248         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
249         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
251                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
252         /* TCC range 2*/
253         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
254         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
255         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
256         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
257         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
258         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
259         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
260         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
261         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
263                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
264         /* TCC range 3*/
265         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
266         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
267         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
269                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
270         /* TCC range 4*/
271         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
273                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
274         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
276                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
277         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
278         /* TCI*/
279         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
280         /* TCP*/
281         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
282         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
283         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
284         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
285         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
286         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
287         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
288         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
289         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
290         /* TD*/
291         TA_RAS_BLOCK__GFX_TD_INDEX_START,
292         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
293         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
294         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
295         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
296         /* EA (3 sub-ranges)*/
297         TA_RAS_BLOCK__GFX_EA_INDEX_START,
298         /* EA range 0*/
299         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
300         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
301         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
303         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
304         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
306         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
307         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
308         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
309         /* EA range 1*/
310         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
311         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
312         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
313         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
314         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
315         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
316         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
318         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
319         /* EA range 2*/
320         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
321         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
322         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
323         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
324         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
325         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
326         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
327         /* UTC VM L2 bank*/
328         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
329         /* UTC VM walker*/
330         TA_RAS_BLOCK__UTC_VML2_WALKER,
331         /* UTC ATC L2 2MB cache*/
332         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
333         /* UTC ATC L2 4KB cache*/
334         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
335         TA_RAS_BLOCK__GFX_MAX
336 };
337
338 struct ras_gfx_subblock {
339         unsigned char *name;
340         int ta_subblock;
341         int hw_supported_error_type;
342         int sw_supported_error_type;
343 };
344
345 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
346         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
347                 #subblock,                                                     \
348                 TA_RAS_BLOCK__##subblock,                                      \
349                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
350                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
351         }
352
353 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
354         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
355         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
366         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
367         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
371                              0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
373                              0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
382                              0, 0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
384                              0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
386                              0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
390                              0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
392                              0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
394                              1),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
396                              0, 0, 0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
408                              0, 0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
414                              0, 0, 0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
426                              0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
440                              1),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
442                              1),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
444                              0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
446                              0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
459                              0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
462                              0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
464                              0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
466                              0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
476         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
501 };
502
503 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
504 {
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
525 };
526
527 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
528 {
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
547 };
548
549 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
550 {
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
560         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
562 };
563
564 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
565 {
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
590 };
591
592 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
593 {
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
601 };
602
603 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
604 {
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624 };
625
626 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
627 {
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
640 };
641
642 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
643 {
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
650 {
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
667 };
668
669 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
670 {
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
684 };
685
686 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
687 {
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
696         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
697         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
698 };
699
700 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
701 {
702         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
707         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
708         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
709         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
710 };
711
712 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
713 {
714         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
719         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
720         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
721         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
722 };
723
724 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
725 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
726 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
727 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
728
729 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
730 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
731 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
732 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
733 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
734                                  struct amdgpu_cu_info *cu_info);
735 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
736 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
737 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
738 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
739 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
740                                           void *ras_error_status);
741 static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev);
742 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
743                                      void *inject_if);
744
745 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
746                                 uint64_t queue_mask)
747 {
748         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
749         amdgpu_ring_write(kiq_ring,
750                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
751                 /* vmid_mask:0* queue_type:0 (KIQ) */
752                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
753         amdgpu_ring_write(kiq_ring,
754                         lower_32_bits(queue_mask));     /* queue mask lo */
755         amdgpu_ring_write(kiq_ring,
756                         upper_32_bits(queue_mask));     /* queue mask hi */
757         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
758         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
759         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
760         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
761 }
762
763 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
764                                  struct amdgpu_ring *ring)
765 {
766         struct amdgpu_device *adev = kiq_ring->adev;
767         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
768         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
769         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
770
771         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
772         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
773         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
774                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
775                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
776                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
777                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
778                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
779                          /*queue_type: normal compute queue */
780                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
781                          /* alloc format: all_on_one_pipe */
782                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
783                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
784                          /* num_queues: must be 1 */
785                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
786         amdgpu_ring_write(kiq_ring,
787                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
788         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
789         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
790         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
791         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
792 }
793
794 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
795                                    struct amdgpu_ring *ring,
796                                    enum amdgpu_unmap_queues_action action,
797                                    u64 gpu_addr, u64 seq)
798 {
799         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
800
801         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
802         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
803                           PACKET3_UNMAP_QUEUES_ACTION(action) |
804                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
805                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
806                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
807         amdgpu_ring_write(kiq_ring,
808                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
809
810         if (action == PREEMPT_QUEUES_NO_UNMAP) {
811                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
812                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
813                 amdgpu_ring_write(kiq_ring, seq);
814         } else {
815                 amdgpu_ring_write(kiq_ring, 0);
816                 amdgpu_ring_write(kiq_ring, 0);
817                 amdgpu_ring_write(kiq_ring, 0);
818         }
819 }
820
821 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
822                                    struct amdgpu_ring *ring,
823                                    u64 addr,
824                                    u64 seq)
825 {
826         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
827
828         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
829         amdgpu_ring_write(kiq_ring,
830                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
831                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
832                           PACKET3_QUERY_STATUS_COMMAND(2));
833         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
834         amdgpu_ring_write(kiq_ring,
835                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
836                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
837         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
838         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
839         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
840         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
841 }
842
843 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
844                                 uint16_t pasid, uint32_t flush_type,
845                                 bool all_hub)
846 {
847         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
848         amdgpu_ring_write(kiq_ring,
849                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
850                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
851                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
852                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
853 }
854
855 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
856         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
857         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
858         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
859         .kiq_query_status = gfx_v9_0_kiq_query_status,
860         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
861         .set_resources_size = 8,
862         .map_queues_size = 7,
863         .unmap_queues_size = 6,
864         .query_status_size = 7,
865         .invalidate_tlbs_size = 2,
866 };
867
868 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
869 {
870         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
871 }
872
873 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
874 {
875         switch (adev->asic_type) {
876         case CHIP_VEGA10:
877                 soc15_program_register_sequence(adev,
878                                                 golden_settings_gc_9_0,
879                                                 ARRAY_SIZE(golden_settings_gc_9_0));
880                 soc15_program_register_sequence(adev,
881                                                 golden_settings_gc_9_0_vg10,
882                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
883                 break;
884         case CHIP_VEGA12:
885                 soc15_program_register_sequence(adev,
886                                                 golden_settings_gc_9_2_1,
887                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
888                 soc15_program_register_sequence(adev,
889                                                 golden_settings_gc_9_2_1_vg12,
890                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
891                 break;
892         case CHIP_VEGA20:
893                 soc15_program_register_sequence(adev,
894                                                 golden_settings_gc_9_0,
895                                                 ARRAY_SIZE(golden_settings_gc_9_0));
896                 soc15_program_register_sequence(adev,
897                                                 golden_settings_gc_9_0_vg20,
898                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
899                 break;
900         case CHIP_ARCTURUS:
901                 soc15_program_register_sequence(adev,
902                                                 golden_settings_gc_9_4_1_arct,
903                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
904                 break;
905         case CHIP_RAVEN:
906                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
907                                                 ARRAY_SIZE(golden_settings_gc_9_1));
908                 if (adev->rev_id >= 8)
909                         soc15_program_register_sequence(adev,
910                                                         golden_settings_gc_9_1_rv2,
911                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
912                 else
913                         soc15_program_register_sequence(adev,
914                                                         golden_settings_gc_9_1_rv1,
915                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
916                 break;
917          case CHIP_RENOIR:
918                 soc15_program_register_sequence(adev,
919                                                 golden_settings_gc_9_1_rn,
920                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
921                 return; /* for renoir, don't need common goldensetting */
922         default:
923                 break;
924         }
925
926         if (adev->asic_type != CHIP_ARCTURUS)
927                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
928                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
929 }
930
931 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
932 {
933         adev->gfx.scratch.num_reg = 8;
934         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
935         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
936 }
937
938 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
939                                        bool wc, uint32_t reg, uint32_t val)
940 {
941         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
942         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
943                                 WRITE_DATA_DST_SEL(0) |
944                                 (wc ? WR_CONFIRM : 0));
945         amdgpu_ring_write(ring, reg);
946         amdgpu_ring_write(ring, 0);
947         amdgpu_ring_write(ring, val);
948 }
949
950 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
951                                   int mem_space, int opt, uint32_t addr0,
952                                   uint32_t addr1, uint32_t ref, uint32_t mask,
953                                   uint32_t inv)
954 {
955         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
956         amdgpu_ring_write(ring,
957                                  /* memory (1) or register (0) */
958                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
959                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
960                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
961                                  WAIT_REG_MEM_ENGINE(eng_sel)));
962
963         if (mem_space)
964                 BUG_ON(addr0 & 0x3); /* Dword align */
965         amdgpu_ring_write(ring, addr0);
966         amdgpu_ring_write(ring, addr1);
967         amdgpu_ring_write(ring, ref);
968         amdgpu_ring_write(ring, mask);
969         amdgpu_ring_write(ring, inv); /* poll interval */
970 }
971
972 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
973 {
974         struct amdgpu_device *adev = ring->adev;
975         uint32_t scratch;
976         uint32_t tmp = 0;
977         unsigned i;
978         int r;
979
980         r = amdgpu_gfx_scratch_get(adev, &scratch);
981         if (r)
982                 return r;
983
984         WREG32(scratch, 0xCAFEDEAD);
985         r = amdgpu_ring_alloc(ring, 3);
986         if (r)
987                 goto error_free_scratch;
988
989         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
990         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
991         amdgpu_ring_write(ring, 0xDEADBEEF);
992         amdgpu_ring_commit(ring);
993
994         for (i = 0; i < adev->usec_timeout; i++) {
995                 tmp = RREG32(scratch);
996                 if (tmp == 0xDEADBEEF)
997                         break;
998                 udelay(1);
999         }
1000
1001         if (i >= adev->usec_timeout)
1002                 r = -ETIMEDOUT;
1003
1004 error_free_scratch:
1005         amdgpu_gfx_scratch_free(adev, scratch);
1006         return r;
1007 }
1008
1009 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1010 {
1011         struct amdgpu_device *adev = ring->adev;
1012         struct amdgpu_ib ib;
1013         struct dma_fence *f = NULL;
1014
1015         unsigned index;
1016         uint64_t gpu_addr;
1017         uint32_t tmp;
1018         long r;
1019
1020         r = amdgpu_device_wb_get(adev, &index);
1021         if (r)
1022                 return r;
1023
1024         gpu_addr = adev->wb.gpu_addr + (index * 4);
1025         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1026         memset(&ib, 0, sizeof(ib));
1027         r = amdgpu_ib_get(adev, NULL, 16, &ib);
1028         if (r)
1029                 goto err1;
1030
1031         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1032         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1033         ib.ptr[2] = lower_32_bits(gpu_addr);
1034         ib.ptr[3] = upper_32_bits(gpu_addr);
1035         ib.ptr[4] = 0xDEADBEEF;
1036         ib.length_dw = 5;
1037
1038         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1039         if (r)
1040                 goto err2;
1041
1042         r = dma_fence_wait_timeout(f, false, timeout);
1043         if (r == 0) {
1044                 r = -ETIMEDOUT;
1045                 goto err2;
1046         } else if (r < 0) {
1047                 goto err2;
1048         }
1049
1050         tmp = adev->wb.wb[index];
1051         if (tmp == 0xDEADBEEF)
1052                 r = 0;
1053         else
1054                 r = -EINVAL;
1055
1056 err2:
1057         amdgpu_ib_free(adev, &ib, NULL);
1058         dma_fence_put(f);
1059 err1:
1060         amdgpu_device_wb_free(adev, index);
1061         return r;
1062 }
1063
1064
1065 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1066 {
1067         release_firmware(adev->gfx.pfp_fw);
1068         adev->gfx.pfp_fw = NULL;
1069         release_firmware(adev->gfx.me_fw);
1070         adev->gfx.me_fw = NULL;
1071         release_firmware(adev->gfx.ce_fw);
1072         adev->gfx.ce_fw = NULL;
1073         release_firmware(adev->gfx.rlc_fw);
1074         adev->gfx.rlc_fw = NULL;
1075         release_firmware(adev->gfx.mec_fw);
1076         adev->gfx.mec_fw = NULL;
1077         release_firmware(adev->gfx.mec2_fw);
1078         adev->gfx.mec2_fw = NULL;
1079
1080         kfree(adev->gfx.rlc.register_list_format);
1081 }
1082
1083 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1084 {
1085         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1086
1087         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1088         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1089         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1090         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1091         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1092         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1093         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1094         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1095         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1096         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1097         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1098         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1099         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1100         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1101                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1102 }
1103
1104 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1105 {
1106         adev->gfx.me_fw_write_wait = false;
1107         adev->gfx.mec_fw_write_wait = false;
1108
1109         if ((adev->gfx.mec_fw_version < 0x000001a5) ||
1110             (adev->gfx.mec_feature_version < 46) ||
1111             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1112             (adev->gfx.pfp_feature_version < 46))
1113                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1114
1115         switch (adev->asic_type) {
1116         case CHIP_VEGA10:
1117                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1118                     (adev->gfx.me_feature_version >= 42) &&
1119                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1120                     (adev->gfx.pfp_feature_version >= 42))
1121                         adev->gfx.me_fw_write_wait = true;
1122
1123                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1124                     (adev->gfx.mec_feature_version >= 42))
1125                         adev->gfx.mec_fw_write_wait = true;
1126                 break;
1127         case CHIP_VEGA12:
1128                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1129                     (adev->gfx.me_feature_version >= 44) &&
1130                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1131                     (adev->gfx.pfp_feature_version >= 44))
1132                         adev->gfx.me_fw_write_wait = true;
1133
1134                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1135                     (adev->gfx.mec_feature_version >= 44))
1136                         adev->gfx.mec_fw_write_wait = true;
1137                 break;
1138         case CHIP_VEGA20:
1139                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1140                     (adev->gfx.me_feature_version >= 44) &&
1141                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1142                     (adev->gfx.pfp_feature_version >= 44))
1143                         adev->gfx.me_fw_write_wait = true;
1144
1145                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1146                     (adev->gfx.mec_feature_version >= 44))
1147                         adev->gfx.mec_fw_write_wait = true;
1148                 break;
1149         case CHIP_RAVEN:
1150                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1151                     (adev->gfx.me_feature_version >= 42) &&
1152                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1153                     (adev->gfx.pfp_feature_version >= 42))
1154                         adev->gfx.me_fw_write_wait = true;
1155
1156                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1157                     (adev->gfx.mec_feature_version >= 42))
1158                         adev->gfx.mec_fw_write_wait = true;
1159                 break;
1160         default:
1161                 break;
1162         }
1163 }
1164
1165 struct amdgpu_gfxoff_quirk {
1166         u16 chip_vendor;
1167         u16 chip_device;
1168         u16 subsys_vendor;
1169         u16 subsys_device;
1170         u8 revision;
1171 };
1172
1173 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1174         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1175         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1176         { 0, 0, 0, 0, 0 },
1177 };
1178
1179 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1180 {
1181         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1182
1183         while (p && p->chip_device != 0) {
1184                 if (pdev->vendor == p->chip_vendor &&
1185                     pdev->device == p->chip_device &&
1186                     pdev->subsystem_vendor == p->subsys_vendor &&
1187                     pdev->subsystem_device == p->subsys_device &&
1188                     pdev->revision == p->revision) {
1189                         return true;
1190                 }
1191                 ++p;
1192         }
1193         return false;
1194 }
1195
1196 static bool is_raven_kicker(struct amdgpu_device *adev)
1197 {
1198         if (adev->pm.fw_version >= 0x41e2b)
1199                 return true;
1200         else
1201                 return false;
1202 }
1203
1204 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1205 {
1206         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1207                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1208
1209         switch (adev->asic_type) {
1210         case CHIP_VEGA10:
1211         case CHIP_VEGA12:
1212         case CHIP_VEGA20:
1213                 break;
1214         case CHIP_RAVEN:
1215                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) &&
1216                     ((!is_raven_kicker(adev) &&
1217                       adev->gfx.rlc_fw_version < 531) ||
1218                      (adev->gfx.rlc_feature_version < 1) ||
1219                      !adev->gfx.rlc.is_rlc_v2_1))
1220                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1221
1222                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1223                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1224                                 AMD_PG_SUPPORT_CP |
1225                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1226                 break;
1227         case CHIP_RENOIR:
1228                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1229                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1230                                 AMD_PG_SUPPORT_CP |
1231                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1232                 break;
1233         default:
1234                 break;
1235         }
1236 }
1237
1238 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1239                                           const char *chip_name)
1240 {
1241         char fw_name[30];
1242         int err;
1243         struct amdgpu_firmware_info *info = NULL;
1244         const struct common_firmware_header *header = NULL;
1245         const struct gfx_firmware_header_v1_0 *cp_hdr;
1246
1247         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1248         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1249         if (err)
1250                 goto out;
1251         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1252         if (err)
1253                 goto out;
1254         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1255         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1256         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1257
1258         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1259         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1260         if (err)
1261                 goto out;
1262         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1263         if (err)
1264                 goto out;
1265         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1266         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1267         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1268
1269         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1270         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1271         if (err)
1272                 goto out;
1273         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1274         if (err)
1275                 goto out;
1276         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1277         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1278         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1279
1280         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1281                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1282                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1283                 info->fw = adev->gfx.pfp_fw;
1284                 header = (const struct common_firmware_header *)info->fw->data;
1285                 adev->firmware.fw_size +=
1286                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1287
1288                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1289                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1290                 info->fw = adev->gfx.me_fw;
1291                 header = (const struct common_firmware_header *)info->fw->data;
1292                 adev->firmware.fw_size +=
1293                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1294
1295                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1296                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1297                 info->fw = adev->gfx.ce_fw;
1298                 header = (const struct common_firmware_header *)info->fw->data;
1299                 adev->firmware.fw_size +=
1300                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1301         }
1302
1303 out:
1304         if (err) {
1305                 dev_err(adev->dev,
1306                         "gfx9: Failed to load firmware \"%s\"\n",
1307                         fw_name);
1308                 release_firmware(adev->gfx.pfp_fw);
1309                 adev->gfx.pfp_fw = NULL;
1310                 release_firmware(adev->gfx.me_fw);
1311                 adev->gfx.me_fw = NULL;
1312                 release_firmware(adev->gfx.ce_fw);
1313                 adev->gfx.ce_fw = NULL;
1314         }
1315         return err;
1316 }
1317
1318 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1319                                           const char *chip_name)
1320 {
1321         char fw_name[30];
1322         int err;
1323         struct amdgpu_firmware_info *info = NULL;
1324         const struct common_firmware_header *header = NULL;
1325         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1326         unsigned int *tmp = NULL;
1327         unsigned int i = 0;
1328         uint16_t version_major;
1329         uint16_t version_minor;
1330         uint32_t smu_version;
1331
1332         /*
1333          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1334          * instead of picasso_rlc.bin.
1335          * Judgment method:
1336          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1337          *          or revision >= 0xD8 && revision <= 0xDF
1338          * otherwise is PCO FP5
1339          */
1340         if (!strcmp(chip_name, "picasso") &&
1341                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1342                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1343                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1344         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1345                 (smu_version >= 0x41e2b))
1346                 /**
1347                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1348                 */
1349                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1350         else
1351                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1352         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1353         if (err)
1354                 goto out;
1355         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1356         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1357
1358         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1359         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1360         if (version_major == 2 && version_minor == 1)
1361                 adev->gfx.rlc.is_rlc_v2_1 = true;
1362
1363         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1364         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1365         adev->gfx.rlc.save_and_restore_offset =
1366                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1367         adev->gfx.rlc.clear_state_descriptor_offset =
1368                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1369         adev->gfx.rlc.avail_scratch_ram_locations =
1370                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1371         adev->gfx.rlc.reg_restore_list_size =
1372                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1373         adev->gfx.rlc.reg_list_format_start =
1374                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1375         adev->gfx.rlc.reg_list_format_separate_start =
1376                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1377         adev->gfx.rlc.starting_offsets_start =
1378                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1379         adev->gfx.rlc.reg_list_format_size_bytes =
1380                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1381         adev->gfx.rlc.reg_list_size_bytes =
1382                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1383         adev->gfx.rlc.register_list_format =
1384                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1385                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1386         if (!adev->gfx.rlc.register_list_format) {
1387                 err = -ENOMEM;
1388                 goto out;
1389         }
1390
1391         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1392                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1393         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1394                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1395
1396         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1397
1398         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1399                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1400         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1401                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1402
1403         if (adev->gfx.rlc.is_rlc_v2_1)
1404                 gfx_v9_0_init_rlc_ext_microcode(adev);
1405
1406         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1407                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1408                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1409                 info->fw = adev->gfx.rlc_fw;
1410                 header = (const struct common_firmware_header *)info->fw->data;
1411                 adev->firmware.fw_size +=
1412                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1413
1414                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1415                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1416                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1417                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1418                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1419                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1420                         info->fw = adev->gfx.rlc_fw;
1421                         adev->firmware.fw_size +=
1422                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1423
1424                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1425                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1426                         info->fw = adev->gfx.rlc_fw;
1427                         adev->firmware.fw_size +=
1428                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1429
1430                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1431                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1432                         info->fw = adev->gfx.rlc_fw;
1433                         adev->firmware.fw_size +=
1434                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1435                 }
1436         }
1437
1438 out:
1439         if (err) {
1440                 dev_err(adev->dev,
1441                         "gfx9: Failed to load firmware \"%s\"\n",
1442                         fw_name);
1443                 release_firmware(adev->gfx.rlc_fw);
1444                 adev->gfx.rlc_fw = NULL;
1445         }
1446         return err;
1447 }
1448
1449 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1450                                           const char *chip_name)
1451 {
1452         char fw_name[30];
1453         int err;
1454         struct amdgpu_firmware_info *info = NULL;
1455         const struct common_firmware_header *header = NULL;
1456         const struct gfx_firmware_header_v1_0 *cp_hdr;
1457
1458         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1459         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1460         if (err)
1461                 goto out;
1462         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1463         if (err)
1464                 goto out;
1465         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1466         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1467         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1468
1469
1470         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1471         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1472         if (!err) {
1473                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1474                 if (err)
1475                         goto out;
1476                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1477                 adev->gfx.mec2_fw->data;
1478                 adev->gfx.mec2_fw_version =
1479                 le32_to_cpu(cp_hdr->header.ucode_version);
1480                 adev->gfx.mec2_feature_version =
1481                 le32_to_cpu(cp_hdr->ucode_feature_version);
1482         } else {
1483                 err = 0;
1484                 adev->gfx.mec2_fw = NULL;
1485         }
1486
1487         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1488                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1489                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1490                 info->fw = adev->gfx.mec_fw;
1491                 header = (const struct common_firmware_header *)info->fw->data;
1492                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1493                 adev->firmware.fw_size +=
1494                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1495
1496                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1497                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1498                 info->fw = adev->gfx.mec_fw;
1499                 adev->firmware.fw_size +=
1500                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1501
1502                 if (adev->gfx.mec2_fw) {
1503                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1504                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1505                         info->fw = adev->gfx.mec2_fw;
1506                         header = (const struct common_firmware_header *)info->fw->data;
1507                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1508                         adev->firmware.fw_size +=
1509                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1510
1511                         /* TODO: Determine if MEC2 JT FW loading can be removed
1512                                  for all GFX V9 asic and above */
1513                         if (adev->asic_type != CHIP_ARCTURUS &&
1514                             adev->asic_type != CHIP_RENOIR) {
1515                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1516                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1517                                 info->fw = adev->gfx.mec2_fw;
1518                                 adev->firmware.fw_size +=
1519                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1520                                         PAGE_SIZE);
1521                         }
1522                 }
1523         }
1524
1525 out:
1526         gfx_v9_0_check_if_need_gfxoff(adev);
1527         gfx_v9_0_check_fw_write_wait(adev);
1528         if (err) {
1529                 dev_err(adev->dev,
1530                         "gfx9: Failed to load firmware \"%s\"\n",
1531                         fw_name);
1532                 release_firmware(adev->gfx.mec_fw);
1533                 adev->gfx.mec_fw = NULL;
1534                 release_firmware(adev->gfx.mec2_fw);
1535                 adev->gfx.mec2_fw = NULL;
1536         }
1537         return err;
1538 }
1539
1540 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1541 {
1542         const char *chip_name;
1543         int r;
1544
1545         DRM_DEBUG("\n");
1546
1547         switch (adev->asic_type) {
1548         case CHIP_VEGA10:
1549                 chip_name = "vega10";
1550                 break;
1551         case CHIP_VEGA12:
1552                 chip_name = "vega12";
1553                 break;
1554         case CHIP_VEGA20:
1555                 chip_name = "vega20";
1556                 break;
1557         case CHIP_RAVEN:
1558                 if (adev->rev_id >= 8)
1559                         chip_name = "raven2";
1560                 else if (adev->pdev->device == 0x15d8)
1561                         chip_name = "picasso";
1562                 else
1563                         chip_name = "raven";
1564                 break;
1565         case CHIP_ARCTURUS:
1566                 chip_name = "arcturus";
1567                 break;
1568         case CHIP_RENOIR:
1569                 chip_name = "renoir";
1570                 break;
1571         default:
1572                 BUG();
1573         }
1574
1575         /* No CPG in Arcturus */
1576         if (adev->asic_type != CHIP_ARCTURUS) {
1577                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1578                 if (r)
1579                         return r;
1580         }
1581
1582         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1583         if (r)
1584                 return r;
1585
1586         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1587         if (r)
1588                 return r;
1589
1590         return r;
1591 }
1592
1593 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1594 {
1595         u32 count = 0;
1596         const struct cs_section_def *sect = NULL;
1597         const struct cs_extent_def *ext = NULL;
1598
1599         /* begin clear state */
1600         count += 2;
1601         /* context control state */
1602         count += 3;
1603
1604         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1605                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1606                         if (sect->id == SECT_CONTEXT)
1607                                 count += 2 + ext->reg_count;
1608                         else
1609                                 return 0;
1610                 }
1611         }
1612
1613         /* end clear state */
1614         count += 2;
1615         /* clear state */
1616         count += 2;
1617
1618         return count;
1619 }
1620
1621 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1622                                     volatile u32 *buffer)
1623 {
1624         u32 count = 0, i;
1625         const struct cs_section_def *sect = NULL;
1626         const struct cs_extent_def *ext = NULL;
1627
1628         if (adev->gfx.rlc.cs_data == NULL)
1629                 return;
1630         if (buffer == NULL)
1631                 return;
1632
1633         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1634         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1635
1636         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1637         buffer[count++] = cpu_to_le32(0x80000000);
1638         buffer[count++] = cpu_to_le32(0x80000000);
1639
1640         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1641                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1642                         if (sect->id == SECT_CONTEXT) {
1643                                 buffer[count++] =
1644                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1645                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1646                                                 PACKET3_SET_CONTEXT_REG_START);
1647                                 for (i = 0; i < ext->reg_count; i++)
1648                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1649                         } else {
1650                                 return;
1651                         }
1652                 }
1653         }
1654
1655         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1656         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1657
1658         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1659         buffer[count++] = cpu_to_le32(0);
1660 }
1661
1662 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1663 {
1664         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1665         uint32_t pg_always_on_cu_num = 2;
1666         uint32_t always_on_cu_num;
1667         uint32_t i, j, k;
1668         uint32_t mask, cu_bitmap, counter;
1669
1670         if (adev->flags & AMD_IS_APU)
1671                 always_on_cu_num = 4;
1672         else if (adev->asic_type == CHIP_VEGA12)
1673                 always_on_cu_num = 8;
1674         else
1675                 always_on_cu_num = 12;
1676
1677         mutex_lock(&adev->grbm_idx_mutex);
1678         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1679                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1680                         mask = 1;
1681                         cu_bitmap = 0;
1682                         counter = 0;
1683                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1684
1685                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1686                                 if (cu_info->bitmap[i][j] & mask) {
1687                                         if (counter == pg_always_on_cu_num)
1688                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1689                                         if (counter < always_on_cu_num)
1690                                                 cu_bitmap |= mask;
1691                                         else
1692                                                 break;
1693                                         counter++;
1694                                 }
1695                                 mask <<= 1;
1696                         }
1697
1698                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1699                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1700                 }
1701         }
1702         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1703         mutex_unlock(&adev->grbm_idx_mutex);
1704 }
1705
1706 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1707 {
1708         uint32_t data;
1709
1710         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1711         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1712         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1713         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1714         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1715
1716         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1717         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1718
1719         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1720         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1721
1722         mutex_lock(&adev->grbm_idx_mutex);
1723         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1724         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1725         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1726
1727         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1728         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1729         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1730         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1731         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1732
1733         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1734         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1735         data &= 0x0000FFFF;
1736         data |= 0x00C00000;
1737         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1738
1739         /*
1740          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1741          * programmed in gfx_v9_0_init_always_on_cu_mask()
1742          */
1743
1744         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1745          * but used for RLC_LB_CNTL configuration */
1746         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1747         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1748         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1749         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1750         mutex_unlock(&adev->grbm_idx_mutex);
1751
1752         gfx_v9_0_init_always_on_cu_mask(adev);
1753 }
1754
1755 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1756 {
1757         uint32_t data;
1758
1759         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1760         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1761         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1762         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1763         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1764
1765         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1766         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1767
1768         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1769         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1770
1771         mutex_lock(&adev->grbm_idx_mutex);
1772         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1773         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1774         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1775
1776         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1777         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1778         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1779         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1780         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1781
1782         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1783         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1784         data &= 0x0000FFFF;
1785         data |= 0x00C00000;
1786         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1787
1788         /*
1789          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1790          * programmed in gfx_v9_0_init_always_on_cu_mask()
1791          */
1792
1793         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1794          * but used for RLC_LB_CNTL configuration */
1795         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1796         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1797         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1798         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1799         mutex_unlock(&adev->grbm_idx_mutex);
1800
1801         gfx_v9_0_init_always_on_cu_mask(adev);
1802 }
1803
1804 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1805 {
1806         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1807 }
1808
1809 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1810 {
1811         return 5;
1812 }
1813
1814 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1815 {
1816         const struct cs_section_def *cs_data;
1817         int r;
1818
1819         adev->gfx.rlc.cs_data = gfx9_cs_data;
1820
1821         cs_data = adev->gfx.rlc.cs_data;
1822
1823         if (cs_data) {
1824                 /* init clear state block */
1825                 r = amdgpu_gfx_rlc_init_csb(adev);
1826                 if (r)
1827                         return r;
1828         }
1829
1830         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1831                 /* TODO: double check the cp_table_size for RV */
1832                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1833                 r = amdgpu_gfx_rlc_init_cpt(adev);
1834                 if (r)
1835                         return r;
1836         }
1837
1838         switch (adev->asic_type) {
1839         case CHIP_RAVEN:
1840                 gfx_v9_0_init_lbpw(adev);
1841                 break;
1842         case CHIP_VEGA20:
1843                 gfx_v9_4_init_lbpw(adev);
1844                 break;
1845         default:
1846                 break;
1847         }
1848
1849         return 0;
1850 }
1851
1852 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1853 {
1854         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1855         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1856 }
1857
1858 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1859 {
1860         int r;
1861         u32 *hpd;
1862         const __le32 *fw_data;
1863         unsigned fw_size;
1864         u32 *fw;
1865         size_t mec_hpd_size;
1866
1867         const struct gfx_firmware_header_v1_0 *mec_hdr;
1868
1869         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1870
1871         /* take ownership of the relevant compute queues */
1872         amdgpu_gfx_compute_queue_acquire(adev);
1873         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1874
1875         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1876                                       AMDGPU_GEM_DOMAIN_VRAM,
1877                                       &adev->gfx.mec.hpd_eop_obj,
1878                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1879                                       (void **)&hpd);
1880         if (r) {
1881                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1882                 gfx_v9_0_mec_fini(adev);
1883                 return r;
1884         }
1885
1886         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1887
1888         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1889         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1890
1891         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1892
1893         fw_data = (const __le32 *)
1894                 (adev->gfx.mec_fw->data +
1895                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1896         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1897
1898         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1899                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1900                                       &adev->gfx.mec.mec_fw_obj,
1901                                       &adev->gfx.mec.mec_fw_gpu_addr,
1902                                       (void **)&fw);
1903         if (r) {
1904                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1905                 gfx_v9_0_mec_fini(adev);
1906                 return r;
1907         }
1908
1909         memcpy(fw, fw_data, fw_size);
1910
1911         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1912         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1913
1914         return 0;
1915 }
1916
1917 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1918 {
1919         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1920                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1921                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1922                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1923                 (SQ_IND_INDEX__FORCE_READ_MASK));
1924         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1925 }
1926
1927 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1928                            uint32_t wave, uint32_t thread,
1929                            uint32_t regno, uint32_t num, uint32_t *out)
1930 {
1931         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1932                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1933                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1934                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1935                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1936                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1937                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1938         while (num--)
1939                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1940 }
1941
1942 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1943 {
1944         /* type 1 wave data */
1945         dst[(*no_fields)++] = 1;
1946         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1947         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1948         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1949         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1950         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1951         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1952         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1953         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1954         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1955         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1956         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1957         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1958         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1959         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1960 }
1961
1962 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1963                                      uint32_t wave, uint32_t start,
1964                                      uint32_t size, uint32_t *dst)
1965 {
1966         wave_read_regs(
1967                 adev, simd, wave, 0,
1968                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1969 }
1970
1971 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1972                                      uint32_t wave, uint32_t thread,
1973                                      uint32_t start, uint32_t size,
1974                                      uint32_t *dst)
1975 {
1976         wave_read_regs(
1977                 adev, simd, wave, thread,
1978                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1979 }
1980
1981 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1982                                   u32 me, u32 pipe, u32 q, u32 vm)
1983 {
1984         soc15_grbm_select(adev, me, pipe, q, vm);
1985 }
1986
1987 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1988         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1989         .select_se_sh = &gfx_v9_0_select_se_sh,
1990         .read_wave_data = &gfx_v9_0_read_wave_data,
1991         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1992         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1993         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1994         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1995         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1996 };
1997
1998 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = {
1999         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2000         .select_se_sh = &gfx_v9_0_select_se_sh,
2001         .read_wave_data = &gfx_v9_0_read_wave_data,
2002         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2003         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2004         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2005         .ras_error_inject = &gfx_v9_4_ras_error_inject,
2006         .query_ras_error_count = &gfx_v9_4_query_ras_error_count
2007 };
2008
2009 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2010 {
2011         u32 gb_addr_config;
2012         int err;
2013
2014         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2015
2016         switch (adev->asic_type) {
2017         case CHIP_VEGA10:
2018                 adev->gfx.config.max_hw_contexts = 8;
2019                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2020                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2021                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2022                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2023                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2024                 break;
2025         case CHIP_VEGA12:
2026                 adev->gfx.config.max_hw_contexts = 8;
2027                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2028                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2029                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2030                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2031                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2032                 DRM_INFO("fix gfx.config for vega12\n");
2033                 break;
2034         case CHIP_VEGA20:
2035                 adev->gfx.config.max_hw_contexts = 8;
2036                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2041                 gb_addr_config &= ~0xf3e777ff;
2042                 gb_addr_config |= 0x22014042;
2043                 /* check vbios table if gpu info is not available */
2044                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2045                 if (err)
2046                         return err;
2047                 break;
2048         case CHIP_RAVEN:
2049                 adev->gfx.config.max_hw_contexts = 8;
2050                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2051                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2052                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2053                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2054                 if (adev->rev_id >= 8)
2055                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2056                 else
2057                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2058                 break;
2059         case CHIP_ARCTURUS:
2060                 adev->gfx.funcs = &gfx_v9_4_gfx_funcs;
2061                 adev->gfx.config.max_hw_contexts = 8;
2062                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2063                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2064                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2065                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2066                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2067                 gb_addr_config &= ~0xf3e777ff;
2068                 gb_addr_config |= 0x22014042;
2069                 break;
2070         case CHIP_RENOIR:
2071                 adev->gfx.config.max_hw_contexts = 8;
2072                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2073                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2074                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2075                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2076                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2077                 gb_addr_config &= ~0xf3e777ff;
2078                 gb_addr_config |= 0x22010042;
2079                 break;
2080         default:
2081                 BUG();
2082                 break;
2083         }
2084
2085         adev->gfx.config.gb_addr_config = gb_addr_config;
2086
2087         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2088                         REG_GET_FIELD(
2089                                         adev->gfx.config.gb_addr_config,
2090                                         GB_ADDR_CONFIG,
2091                                         NUM_PIPES);
2092
2093         adev->gfx.config.max_tile_pipes =
2094                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2095
2096         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2097                         REG_GET_FIELD(
2098                                         adev->gfx.config.gb_addr_config,
2099                                         GB_ADDR_CONFIG,
2100                                         NUM_BANKS);
2101         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2102                         REG_GET_FIELD(
2103                                         adev->gfx.config.gb_addr_config,
2104                                         GB_ADDR_CONFIG,
2105                                         MAX_COMPRESSED_FRAGS);
2106         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2107                         REG_GET_FIELD(
2108                                         adev->gfx.config.gb_addr_config,
2109                                         GB_ADDR_CONFIG,
2110                                         NUM_RB_PER_SE);
2111         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2112                         REG_GET_FIELD(
2113                                         adev->gfx.config.gb_addr_config,
2114                                         GB_ADDR_CONFIG,
2115                                         NUM_SHADER_ENGINES);
2116         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2117                         REG_GET_FIELD(
2118                                         adev->gfx.config.gb_addr_config,
2119                                         GB_ADDR_CONFIG,
2120                                         PIPE_INTERLEAVE_SIZE));
2121
2122         return 0;
2123 }
2124
2125 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2126                                       int mec, int pipe, int queue)
2127 {
2128         int r;
2129         unsigned irq_type;
2130         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2131
2132         ring = &adev->gfx.compute_ring[ring_id];
2133
2134         /* mec0 is me1 */
2135         ring->me = mec + 1;
2136         ring->pipe = pipe;
2137         ring->queue = queue;
2138
2139         ring->ring_obj = NULL;
2140         ring->use_doorbell = true;
2141         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2142         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2143                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2144         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2145
2146         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2147                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2148                 + ring->pipe;
2149
2150         /* type-2 packets are deprecated on MEC, use type-3 instead */
2151         r = amdgpu_ring_init(adev, ring, 1024,
2152                              &adev->gfx.eop_irq, irq_type);
2153         if (r)
2154                 return r;
2155
2156
2157         return 0;
2158 }
2159
2160 static int gfx_v9_0_sw_init(void *handle)
2161 {
2162         int i, j, k, r, ring_id;
2163         struct amdgpu_ring *ring;
2164         struct amdgpu_kiq *kiq;
2165         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2166
2167         switch (adev->asic_type) {
2168         case CHIP_VEGA10:
2169         case CHIP_VEGA12:
2170         case CHIP_VEGA20:
2171         case CHIP_RAVEN:
2172         case CHIP_ARCTURUS:
2173         case CHIP_RENOIR:
2174                 adev->gfx.mec.num_mec = 2;
2175                 break;
2176         default:
2177                 adev->gfx.mec.num_mec = 1;
2178                 break;
2179         }
2180
2181         adev->gfx.mec.num_pipe_per_mec = 4;
2182         adev->gfx.mec.num_queue_per_pipe = 8;
2183
2184         /* EOP Event */
2185         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2186         if (r)
2187                 return r;
2188
2189         /* Privileged reg */
2190         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2191                               &adev->gfx.priv_reg_irq);
2192         if (r)
2193                 return r;
2194
2195         /* Privileged inst */
2196         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2197                               &adev->gfx.priv_inst_irq);
2198         if (r)
2199                 return r;
2200
2201         /* ECC error */
2202         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2203                               &adev->gfx.cp_ecc_error_irq);
2204         if (r)
2205                 return r;
2206
2207         /* FUE error */
2208         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2209                               &adev->gfx.cp_ecc_error_irq);
2210         if (r)
2211                 return r;
2212
2213         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2214
2215         gfx_v9_0_scratch_init(adev);
2216
2217         r = gfx_v9_0_init_microcode(adev);
2218         if (r) {
2219                 DRM_ERROR("Failed to load gfx firmware!\n");
2220                 return r;
2221         }
2222
2223         r = adev->gfx.rlc.funcs->init(adev);
2224         if (r) {
2225                 DRM_ERROR("Failed to init rlc BOs!\n");
2226                 return r;
2227         }
2228
2229         r = gfx_v9_0_mec_init(adev);
2230         if (r) {
2231                 DRM_ERROR("Failed to init MEC BOs!\n");
2232                 return r;
2233         }
2234
2235         /* set up the gfx ring */
2236         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2237                 ring = &adev->gfx.gfx_ring[i];
2238                 ring->ring_obj = NULL;
2239                 if (!i)
2240                         sprintf(ring->name, "gfx");
2241                 else
2242                         sprintf(ring->name, "gfx_%d", i);
2243                 ring->use_doorbell = true;
2244                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2245                 r = amdgpu_ring_init(adev, ring, 1024,
2246                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2247                 if (r)
2248                         return r;
2249         }
2250
2251         /* set up the compute queues - allocate horizontally across pipes */
2252         ring_id = 0;
2253         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2254                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2255                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2256                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2257                                         continue;
2258
2259                                 r = gfx_v9_0_compute_ring_init(adev,
2260                                                                ring_id,
2261                                                                i, k, j);
2262                                 if (r)
2263                                         return r;
2264
2265                                 ring_id++;
2266                         }
2267                 }
2268         }
2269
2270         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2271         if (r) {
2272                 DRM_ERROR("Failed to init KIQ BOs!\n");
2273                 return r;
2274         }
2275
2276         kiq = &adev->gfx.kiq;
2277         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2278         if (r)
2279                 return r;
2280
2281         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2282         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2283         if (r)
2284                 return r;
2285
2286         adev->gfx.ce_ram_size = 0x8000;
2287
2288         r = gfx_v9_0_gpu_early_init(adev);
2289         if (r)
2290                 return r;
2291
2292         return 0;
2293 }
2294
2295
2296 static int gfx_v9_0_sw_fini(void *handle)
2297 {
2298         int i;
2299         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2300
2301         amdgpu_gfx_ras_fini(adev);
2302
2303         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2304                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2305         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2306                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2307
2308         amdgpu_gfx_mqd_sw_fini(adev);
2309         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2310         amdgpu_gfx_kiq_fini(adev);
2311
2312         gfx_v9_0_mec_fini(adev);
2313         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2314         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2315                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2316                                 &adev->gfx.rlc.cp_table_gpu_addr,
2317                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2318         }
2319         gfx_v9_0_free_microcode(adev);
2320
2321         return 0;
2322 }
2323
2324
2325 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2326 {
2327         /* TODO */
2328 }
2329
2330 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2331 {
2332         u32 data;
2333
2334         if (instance == 0xffffffff)
2335                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2336         else
2337                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2338
2339         if (se_num == 0xffffffff)
2340                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2341         else
2342                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2343
2344         if (sh_num == 0xffffffff)
2345                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2346         else
2347                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2348
2349         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2350 }
2351
2352 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2353 {
2354         u32 data, mask;
2355
2356         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2357         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2358
2359         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2360         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2361
2362         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2363                                          adev->gfx.config.max_sh_per_se);
2364
2365         return (~data) & mask;
2366 }
2367
2368 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2369 {
2370         int i, j;
2371         u32 data;
2372         u32 active_rbs = 0;
2373         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2374                                         adev->gfx.config.max_sh_per_se;
2375
2376         mutex_lock(&adev->grbm_idx_mutex);
2377         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2378                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2379                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2380                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2381                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2382                                                rb_bitmap_width_per_sh);
2383                 }
2384         }
2385         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2386         mutex_unlock(&adev->grbm_idx_mutex);
2387
2388         adev->gfx.config.backend_enable_mask = active_rbs;
2389         adev->gfx.config.num_rbs = hweight32(active_rbs);
2390 }
2391
2392 #define DEFAULT_SH_MEM_BASES    (0x6000)
2393 #define FIRST_COMPUTE_VMID      (8)
2394 #define LAST_COMPUTE_VMID       (16)
2395 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2396 {
2397         int i;
2398         uint32_t sh_mem_config;
2399         uint32_t sh_mem_bases;
2400
2401         /*
2402          * Configure apertures:
2403          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2404          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2405          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2406          */
2407         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2408
2409         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2410                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2411                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2412
2413         mutex_lock(&adev->srbm_mutex);
2414         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2415                 soc15_grbm_select(adev, 0, 0, 0, i);
2416                 /* CP and shaders */
2417                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2418                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2419         }
2420         soc15_grbm_select(adev, 0, 0, 0, 0);
2421         mutex_unlock(&adev->srbm_mutex);
2422
2423         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2424            acccess. These should be enabled by FW for target VMIDs. */
2425         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2426                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2427                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2428                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2429                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2430         }
2431 }
2432
2433 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2434 {
2435         int vmid;
2436
2437         /*
2438          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2439          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2440          * the driver can enable them for graphics. VMID0 should maintain
2441          * access so that HWS firmware can save/restore entries.
2442          */
2443         for (vmid = 1; vmid < 16; vmid++) {
2444                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2445                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2446                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2447                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2448         }
2449 }
2450
2451 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2452 {
2453         uint32_t tmp;
2454
2455         switch (adev->asic_type) {
2456         case CHIP_ARCTURUS:
2457                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2458                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2459                                         DISABLE_BARRIER_WAITCNT, 1);
2460                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2461                 break;
2462         default:
2463                 break;
2464         };
2465 }
2466
2467 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2468 {
2469         u32 tmp;
2470         int i;
2471
2472         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2473
2474         gfx_v9_0_tiling_mode_table_init(adev);
2475
2476         gfx_v9_0_setup_rb(adev);
2477         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2478         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2479
2480         /* XXX SH_MEM regs */
2481         /* where to put LDS, scratch, GPUVM in FSA64 space */
2482         mutex_lock(&adev->srbm_mutex);
2483         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2484                 soc15_grbm_select(adev, 0, 0, 0, i);
2485                 /* CP and shaders */
2486                 if (i == 0) {
2487                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2488                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2489                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2490                                             !!amdgpu_noretry);
2491                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2492                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2493                 } else {
2494                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2495                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2496                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2497                                             !!amdgpu_noretry);
2498                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2499                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2500                                 (adev->gmc.private_aperture_start >> 48));
2501                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2502                                 (adev->gmc.shared_aperture_start >> 48));
2503                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2504                 }
2505         }
2506         soc15_grbm_select(adev, 0, 0, 0, 0);
2507
2508         mutex_unlock(&adev->srbm_mutex);
2509
2510         gfx_v9_0_init_compute_vmid(adev);
2511         gfx_v9_0_init_gds_vmid(adev);
2512         gfx_v9_0_init_sq_config(adev);
2513 }
2514
2515 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2516 {
2517         u32 i, j, k;
2518         u32 mask;
2519
2520         mutex_lock(&adev->grbm_idx_mutex);
2521         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2522                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2523                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2524                         for (k = 0; k < adev->usec_timeout; k++) {
2525                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2526                                         break;
2527                                 udelay(1);
2528                         }
2529                         if (k == adev->usec_timeout) {
2530                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2531                                                       0xffffffff, 0xffffffff);
2532                                 mutex_unlock(&adev->grbm_idx_mutex);
2533                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2534                                          i, j);
2535                                 return;
2536                         }
2537                 }
2538         }
2539         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2540         mutex_unlock(&adev->grbm_idx_mutex);
2541
2542         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2543                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2544                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2545                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2546         for (k = 0; k < adev->usec_timeout; k++) {
2547                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2548                         break;
2549                 udelay(1);
2550         }
2551 }
2552
2553 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2554                                                bool enable)
2555 {
2556         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2557
2558         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2559         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2560         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2561         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2562
2563         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2564 }
2565
2566 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2567 {
2568         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2569         /* csib */
2570         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2571                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2572         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2573                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2574         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2575                         adev->gfx.rlc.clear_state_size);
2576 }
2577
2578 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2579                                 int indirect_offset,
2580                                 int list_size,
2581                                 int *unique_indirect_regs,
2582                                 int unique_indirect_reg_count,
2583                                 int *indirect_start_offsets,
2584                                 int *indirect_start_offsets_count,
2585                                 int max_start_offsets_count)
2586 {
2587         int idx;
2588
2589         for (; indirect_offset < list_size; indirect_offset++) {
2590                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2591                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2592                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2593
2594                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2595                         indirect_offset += 2;
2596
2597                         /* look for the matching indice */
2598                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2599                                 if (unique_indirect_regs[idx] ==
2600                                         register_list_format[indirect_offset] ||
2601                                         !unique_indirect_regs[idx])
2602                                         break;
2603                         }
2604
2605                         BUG_ON(idx >= unique_indirect_reg_count);
2606
2607                         if (!unique_indirect_regs[idx])
2608                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2609
2610                         indirect_offset++;
2611                 }
2612         }
2613 }
2614
2615 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2616 {
2617         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2618         int unique_indirect_reg_count = 0;
2619
2620         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2621         int indirect_start_offsets_count = 0;
2622
2623         int list_size = 0;
2624         int i = 0, j = 0;
2625         u32 tmp = 0;
2626
2627         u32 *register_list_format =
2628                 kmemdup(adev->gfx.rlc.register_list_format,
2629                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2630         if (!register_list_format)
2631                 return -ENOMEM;
2632
2633         /* setup unique_indirect_regs array and indirect_start_offsets array */
2634         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2635         gfx_v9_1_parse_ind_reg_list(register_list_format,
2636                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2637                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2638                                     unique_indirect_regs,
2639                                     unique_indirect_reg_count,
2640                                     indirect_start_offsets,
2641                                     &indirect_start_offsets_count,
2642                                     ARRAY_SIZE(indirect_start_offsets));
2643
2644         /* enable auto inc in case it is disabled */
2645         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2646         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2647         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2648
2649         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2650         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2651                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2652         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2653                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2654                         adev->gfx.rlc.register_restore[i]);
2655
2656         /* load indirect register */
2657         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2658                 adev->gfx.rlc.reg_list_format_start);
2659
2660         /* direct register portion */
2661         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2662                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2663                         register_list_format[i]);
2664
2665         /* indirect register portion */
2666         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2667                 if (register_list_format[i] == 0xFFFFFFFF) {
2668                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2669                         continue;
2670                 }
2671
2672                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2673                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2674
2675                 for (j = 0; j < unique_indirect_reg_count; j++) {
2676                         if (register_list_format[i] == unique_indirect_regs[j]) {
2677                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2678                                 break;
2679                         }
2680                 }
2681
2682                 BUG_ON(j >= unique_indirect_reg_count);
2683
2684                 i++;
2685         }
2686
2687         /* set save/restore list size */
2688         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2689         list_size = list_size >> 1;
2690         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2691                 adev->gfx.rlc.reg_restore_list_size);
2692         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2693
2694         /* write the starting offsets to RLC scratch ram */
2695         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2696                 adev->gfx.rlc.starting_offsets_start);
2697         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2698                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2699                        indirect_start_offsets[i]);
2700
2701         /* load unique indirect regs*/
2702         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2703                 if (unique_indirect_regs[i] != 0) {
2704                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2705                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2706                                unique_indirect_regs[i] & 0x3FFFF);
2707
2708                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2709                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2710                                unique_indirect_regs[i] >> 20);
2711                 }
2712         }
2713
2714         kfree(register_list_format);
2715         return 0;
2716 }
2717
2718 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2719 {
2720         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2721 }
2722
2723 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2724                                              bool enable)
2725 {
2726         uint32_t data = 0;
2727         uint32_t default_data = 0;
2728
2729         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2730         if (enable == true) {
2731                 /* enable GFXIP control over CGPG */
2732                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2733                 if(default_data != data)
2734                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2735
2736                 /* update status */
2737                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2738                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2739                 if(default_data != data)
2740                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2741         } else {
2742                 /* restore GFXIP control over GCPG */
2743                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2744                 if(default_data != data)
2745                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2746         }
2747 }
2748
2749 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2750 {
2751         uint32_t data = 0;
2752
2753         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2754                               AMD_PG_SUPPORT_GFX_SMG |
2755                               AMD_PG_SUPPORT_GFX_DMG)) {
2756                 /* init IDLE_POLL_COUNT = 60 */
2757                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2758                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2759                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2760                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2761
2762                 /* init RLC PG Delay */
2763                 data = 0;
2764                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2765                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2766                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2767                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2768                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2769
2770                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2771                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2772                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2773                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2774
2775                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2776                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2777                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2778                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2779
2780                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2781                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2782
2783                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2784                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2785                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2786
2787                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2788         }
2789 }
2790
2791 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2792                                                 bool enable)
2793 {
2794         uint32_t data = 0;
2795         uint32_t default_data = 0;
2796
2797         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2798         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2799                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2800                              enable ? 1 : 0);
2801         if (default_data != data)
2802                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2803 }
2804
2805 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2806                                                 bool enable)
2807 {
2808         uint32_t data = 0;
2809         uint32_t default_data = 0;
2810
2811         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2812         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2813                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2814                              enable ? 1 : 0);
2815         if(default_data != data)
2816                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2817 }
2818
2819 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2820                                         bool enable)
2821 {
2822         uint32_t data = 0;
2823         uint32_t default_data = 0;
2824
2825         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2826         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2827                              CP_PG_DISABLE,
2828                              enable ? 0 : 1);
2829         if(default_data != data)
2830                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2831 }
2832
2833 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2834                                                 bool enable)
2835 {
2836         uint32_t data, default_data;
2837
2838         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2839         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2840                              GFX_POWER_GATING_ENABLE,
2841                              enable ? 1 : 0);
2842         if(default_data != data)
2843                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2844 }
2845
2846 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2847                                                 bool enable)
2848 {
2849         uint32_t data, default_data;
2850
2851         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2852         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2853                              GFX_PIPELINE_PG_ENABLE,
2854                              enable ? 1 : 0);
2855         if(default_data != data)
2856                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2857
2858         if (!enable)
2859                 /* read any GFX register to wake up GFX */
2860                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2861 }
2862
2863 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2864                                                        bool enable)
2865 {
2866         uint32_t data, default_data;
2867
2868         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2869         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2870                              STATIC_PER_CU_PG_ENABLE,
2871                              enable ? 1 : 0);
2872         if(default_data != data)
2873                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2874 }
2875
2876 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2877                                                 bool enable)
2878 {
2879         uint32_t data, default_data;
2880
2881         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2882         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2883                              DYN_PER_CU_PG_ENABLE,
2884                              enable ? 1 : 0);
2885         if(default_data != data)
2886                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2887 }
2888
2889 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2890 {
2891         gfx_v9_0_init_csb(adev);
2892
2893         /*
2894          * Rlc save restore list is workable since v2_1.
2895          * And it's needed by gfxoff feature.
2896          */
2897         if (adev->gfx.rlc.is_rlc_v2_1) {
2898                 if (adev->asic_type == CHIP_VEGA12 ||
2899                     (adev->asic_type == CHIP_RAVEN &&
2900                      adev->rev_id >= 8))
2901                         gfx_v9_1_init_rlc_save_restore_list(adev);
2902                 gfx_v9_0_enable_save_restore_machine(adev);
2903         }
2904
2905         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2906                               AMD_PG_SUPPORT_GFX_SMG |
2907                               AMD_PG_SUPPORT_GFX_DMG |
2908                               AMD_PG_SUPPORT_CP |
2909                               AMD_PG_SUPPORT_GDS |
2910                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2911                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2912                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2913                 gfx_v9_0_init_gfx_power_gating(adev);
2914         }
2915 }
2916
2917 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2918 {
2919         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2920         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2921         gfx_v9_0_wait_for_rlc_serdes(adev);
2922 }
2923
2924 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2925 {
2926         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2927         udelay(50);
2928         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2929         udelay(50);
2930 }
2931
2932 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2933 {
2934 #ifdef AMDGPU_RLC_DEBUG_RETRY
2935         u32 rlc_ucode_ver;
2936 #endif
2937
2938         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2939         udelay(50);
2940
2941         /* carrizo do enable cp interrupt after cp inited */
2942         if (!(adev->flags & AMD_IS_APU)) {
2943                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2944                 udelay(50);
2945         }
2946
2947 #ifdef AMDGPU_RLC_DEBUG_RETRY
2948         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2949         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2950         if(rlc_ucode_ver == 0x108) {
2951                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2952                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2953                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2954                  * default is 0x9C4 to create a 100us interval */
2955                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2956                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2957                  * to disable the page fault retry interrupts, default is
2958                  * 0x100 (256) */
2959                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2960         }
2961 #endif
2962 }
2963
2964 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2965 {
2966         const struct rlc_firmware_header_v2_0 *hdr;
2967         const __le32 *fw_data;
2968         unsigned i, fw_size;
2969
2970         if (!adev->gfx.rlc_fw)
2971                 return -EINVAL;
2972
2973         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2974         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2975
2976         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2977                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2978         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2979
2980         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2981                         RLCG_UCODE_LOADING_START_ADDRESS);
2982         for (i = 0; i < fw_size; i++)
2983                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2984         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2985
2986         return 0;
2987 }
2988
2989 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2990 {
2991         int r;
2992
2993         if (amdgpu_sriov_vf(adev)) {
2994                 gfx_v9_0_init_csb(adev);
2995                 return 0;
2996         }
2997
2998         adev->gfx.rlc.funcs->stop(adev);
2999
3000         /* disable CG */
3001         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3002
3003         gfx_v9_0_init_pg(adev);
3004
3005         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3006                 /* legacy rlc firmware loading */
3007                 r = gfx_v9_0_rlc_load_microcode(adev);
3008                 if (r)
3009                         return r;
3010         }
3011
3012         switch (adev->asic_type) {
3013         case CHIP_RAVEN:
3014                 if (amdgpu_lbpw == 0)
3015                         gfx_v9_0_enable_lbpw(adev, false);
3016                 else
3017                         gfx_v9_0_enable_lbpw(adev, true);
3018                 break;
3019         case CHIP_VEGA20:
3020                 if (amdgpu_lbpw > 0)
3021                         gfx_v9_0_enable_lbpw(adev, true);
3022                 else
3023                         gfx_v9_0_enable_lbpw(adev, false);
3024                 break;
3025         default:
3026                 break;
3027         }
3028
3029         adev->gfx.rlc.funcs->start(adev);
3030
3031         return 0;
3032 }
3033
3034 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3035 {
3036         int i;
3037         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3038
3039         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3040         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3041         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3042         if (!enable) {
3043                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3044                         adev->gfx.gfx_ring[i].sched.ready = false;
3045         }
3046         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3047         udelay(50);
3048 }
3049
3050 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3051 {
3052         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3053         const struct gfx_firmware_header_v1_0 *ce_hdr;
3054         const struct gfx_firmware_header_v1_0 *me_hdr;
3055         const __le32 *fw_data;
3056         unsigned i, fw_size;
3057
3058         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3059                 return -EINVAL;
3060
3061         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3062                 adev->gfx.pfp_fw->data;
3063         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3064                 adev->gfx.ce_fw->data;
3065         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3066                 adev->gfx.me_fw->data;
3067
3068         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3069         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3070         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3071
3072         gfx_v9_0_cp_gfx_enable(adev, false);
3073
3074         /* PFP */
3075         fw_data = (const __le32 *)
3076                 (adev->gfx.pfp_fw->data +
3077                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3078         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3079         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3080         for (i = 0; i < fw_size; i++)
3081                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3082         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3083
3084         /* CE */
3085         fw_data = (const __le32 *)
3086                 (adev->gfx.ce_fw->data +
3087                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3088         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3089         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3090         for (i = 0; i < fw_size; i++)
3091                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3092         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3093
3094         /* ME */
3095         fw_data = (const __le32 *)
3096                 (adev->gfx.me_fw->data +
3097                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3098         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3099         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3100         for (i = 0; i < fw_size; i++)
3101                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3102         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3103
3104         return 0;
3105 }
3106
3107 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3108 {
3109         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3110         const struct cs_section_def *sect = NULL;
3111         const struct cs_extent_def *ext = NULL;
3112         int r, i, tmp;
3113
3114         /* init the CP */
3115         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3116         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3117
3118         gfx_v9_0_cp_gfx_enable(adev, true);
3119
3120         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3121         if (r) {
3122                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3123                 return r;
3124         }
3125
3126         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3127         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3128
3129         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3130         amdgpu_ring_write(ring, 0x80000000);
3131         amdgpu_ring_write(ring, 0x80000000);
3132
3133         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3134                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3135                         if (sect->id == SECT_CONTEXT) {
3136                                 amdgpu_ring_write(ring,
3137                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3138                                                ext->reg_count));
3139                                 amdgpu_ring_write(ring,
3140                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3141                                 for (i = 0; i < ext->reg_count; i++)
3142                                         amdgpu_ring_write(ring, ext->extent[i]);
3143                         }
3144                 }
3145         }
3146
3147         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3148         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3149
3150         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3151         amdgpu_ring_write(ring, 0);
3152
3153         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3154         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3155         amdgpu_ring_write(ring, 0x8000);
3156         amdgpu_ring_write(ring, 0x8000);
3157
3158         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3159         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3160                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3161         amdgpu_ring_write(ring, tmp);
3162         amdgpu_ring_write(ring, 0);
3163
3164         amdgpu_ring_commit(ring);
3165
3166         return 0;
3167 }
3168
3169 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3170 {
3171         struct amdgpu_ring *ring;
3172         u32 tmp;
3173         u32 rb_bufsz;
3174         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3175
3176         /* Set the write pointer delay */
3177         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3178
3179         /* set the RB to use vmid 0 */
3180         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3181
3182         /* Set ring buffer size */
3183         ring = &adev->gfx.gfx_ring[0];
3184         rb_bufsz = order_base_2(ring->ring_size / 8);
3185         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3186         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3187 #ifdef __BIG_ENDIAN
3188         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3189 #endif
3190         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3191
3192         /* Initialize the ring buffer's write pointers */
3193         ring->wptr = 0;
3194         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3195         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3196
3197         /* set the wb address wether it's enabled or not */
3198         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3199         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3200         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3201
3202         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3203         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3204         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3205
3206         mdelay(1);
3207         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3208
3209         rb_addr = ring->gpu_addr >> 8;
3210         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3211         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3212
3213         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3214         if (ring->use_doorbell) {
3215                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3216                                     DOORBELL_OFFSET, ring->doorbell_index);
3217                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3218                                     DOORBELL_EN, 1);
3219         } else {
3220                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3221         }
3222         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3223
3224         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3225                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3226         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3227
3228         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3229                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3230
3231
3232         /* start the ring */
3233         gfx_v9_0_cp_gfx_start(adev);
3234         ring->sched.ready = true;
3235
3236         return 0;
3237 }
3238
3239 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3240 {
3241         int i;
3242
3243         if (enable) {
3244                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3245         } else {
3246                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3247                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3248                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3249                         adev->gfx.compute_ring[i].sched.ready = false;
3250                 adev->gfx.kiq.ring.sched.ready = false;
3251         }
3252         udelay(50);
3253 }
3254
3255 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3256 {
3257         const struct gfx_firmware_header_v1_0 *mec_hdr;
3258         const __le32 *fw_data;
3259         unsigned i;
3260         u32 tmp;
3261
3262         if (!adev->gfx.mec_fw)
3263                 return -EINVAL;
3264
3265         gfx_v9_0_cp_compute_enable(adev, false);
3266
3267         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3268         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3269
3270         fw_data = (const __le32 *)
3271                 (adev->gfx.mec_fw->data +
3272                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3273         tmp = 0;
3274         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3275         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3276         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3277
3278         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3279                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3280         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3281                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3282
3283         /* MEC1 */
3284         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3285                          mec_hdr->jt_offset);
3286         for (i = 0; i < mec_hdr->jt_size; i++)
3287                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3288                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3289
3290         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3291                         adev->gfx.mec_fw_version);
3292         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3293
3294         return 0;
3295 }
3296
3297 /* KIQ functions */
3298 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3299 {
3300         uint32_t tmp;
3301         struct amdgpu_device *adev = ring->adev;
3302
3303         /* tell RLC which is KIQ queue */
3304         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3305         tmp &= 0xffffff00;
3306         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3307         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3308         tmp |= 0x80;
3309         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3310 }
3311
3312 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3313 {
3314         struct amdgpu_device *adev = ring->adev;
3315         struct v9_mqd *mqd = ring->mqd_ptr;
3316         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3317         uint32_t tmp;
3318
3319         mqd->header = 0xC0310800;
3320         mqd->compute_pipelinestat_enable = 0x00000001;
3321         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3322         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3323         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3324         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3325         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3326         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3327         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3328         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3329         mqd->compute_misc_reserved = 0x00000003;
3330
3331         mqd->dynamic_cu_mask_addr_lo =
3332                 lower_32_bits(ring->mqd_gpu_addr
3333                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3334         mqd->dynamic_cu_mask_addr_hi =
3335                 upper_32_bits(ring->mqd_gpu_addr
3336                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3337
3338         eop_base_addr = ring->eop_gpu_addr >> 8;
3339         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3340         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3341
3342         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3343         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3344         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3345                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3346
3347         mqd->cp_hqd_eop_control = tmp;
3348
3349         /* enable doorbell? */
3350         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3351
3352         if (ring->use_doorbell) {
3353                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3354                                     DOORBELL_OFFSET, ring->doorbell_index);
3355                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3356                                     DOORBELL_EN, 1);
3357                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3358                                     DOORBELL_SOURCE, 0);
3359                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3360                                     DOORBELL_HIT, 0);
3361         } else {
3362                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3363                                          DOORBELL_EN, 0);
3364         }
3365
3366         mqd->cp_hqd_pq_doorbell_control = tmp;
3367
3368         /* disable the queue if it's active */
3369         ring->wptr = 0;
3370         mqd->cp_hqd_dequeue_request = 0;
3371         mqd->cp_hqd_pq_rptr = 0;
3372         mqd->cp_hqd_pq_wptr_lo = 0;
3373         mqd->cp_hqd_pq_wptr_hi = 0;
3374
3375         /* set the pointer to the MQD */
3376         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3377         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3378
3379         /* set MQD vmid to 0 */
3380         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3381         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3382         mqd->cp_mqd_control = tmp;
3383
3384         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3385         hqd_gpu_addr = ring->gpu_addr >> 8;
3386         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3387         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3388
3389         /* set up the HQD, this is similar to CP_RB0_CNTL */
3390         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3391         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3392                             (order_base_2(ring->ring_size / 4) - 1));
3393         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3394                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3395 #ifdef __BIG_ENDIAN
3396         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3397 #endif
3398         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3399         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3400         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3401         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3402         mqd->cp_hqd_pq_control = tmp;
3403
3404         /* set the wb address whether it's enabled or not */
3405         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3406         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3407         mqd->cp_hqd_pq_rptr_report_addr_hi =
3408                 upper_32_bits(wb_gpu_addr) & 0xffff;
3409
3410         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3411         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3412         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3413         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3414
3415         tmp = 0;
3416         /* enable the doorbell if requested */
3417         if (ring->use_doorbell) {
3418                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3419                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3420                                 DOORBELL_OFFSET, ring->doorbell_index);
3421
3422                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3423                                          DOORBELL_EN, 1);
3424                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3425                                          DOORBELL_SOURCE, 0);
3426                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3427                                          DOORBELL_HIT, 0);
3428         }
3429
3430         mqd->cp_hqd_pq_doorbell_control = tmp;
3431
3432         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3433         ring->wptr = 0;
3434         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3435
3436         /* set the vmid for the queue */
3437         mqd->cp_hqd_vmid = 0;
3438
3439         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3440         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3441         mqd->cp_hqd_persistent_state = tmp;
3442
3443         /* set MIN_IB_AVAIL_SIZE */
3444         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3445         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3446         mqd->cp_hqd_ib_control = tmp;
3447
3448         /* map_queues packet doesn't need activate the queue,
3449          * so only kiq need set this field.
3450          */
3451         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3452                 mqd->cp_hqd_active = 1;
3453
3454         return 0;
3455 }
3456
3457 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3458 {
3459         struct amdgpu_device *adev = ring->adev;
3460         struct v9_mqd *mqd = ring->mqd_ptr;
3461         int j;
3462
3463         /* disable wptr polling */
3464         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3465
3466         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3467                mqd->cp_hqd_eop_base_addr_lo);
3468         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3469                mqd->cp_hqd_eop_base_addr_hi);
3470
3471         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3472         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3473                mqd->cp_hqd_eop_control);
3474
3475         /* enable doorbell? */
3476         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3477                mqd->cp_hqd_pq_doorbell_control);
3478
3479         /* disable the queue if it's active */
3480         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3481                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3482                 for (j = 0; j < adev->usec_timeout; j++) {
3483                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3484                                 break;
3485                         udelay(1);
3486                 }
3487                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3488                        mqd->cp_hqd_dequeue_request);
3489                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3490                        mqd->cp_hqd_pq_rptr);
3491                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3492                        mqd->cp_hqd_pq_wptr_lo);
3493                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3494                        mqd->cp_hqd_pq_wptr_hi);
3495         }
3496
3497         /* set the pointer to the MQD */
3498         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3499                mqd->cp_mqd_base_addr_lo);
3500         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3501                mqd->cp_mqd_base_addr_hi);
3502
3503         /* set MQD vmid to 0 */
3504         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3505                mqd->cp_mqd_control);
3506
3507         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3508         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3509                mqd->cp_hqd_pq_base_lo);
3510         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3511                mqd->cp_hqd_pq_base_hi);
3512
3513         /* set up the HQD, this is similar to CP_RB0_CNTL */
3514         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3515                mqd->cp_hqd_pq_control);
3516
3517         /* set the wb address whether it's enabled or not */
3518         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3519                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3520         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3521                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3522
3523         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3524         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3525                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3526         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3527                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3528
3529         /* enable the doorbell if requested */
3530         if (ring->use_doorbell) {
3531                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3532                                         (adev->doorbell_index.kiq * 2) << 2);
3533                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3534                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3535         }
3536
3537         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3538                mqd->cp_hqd_pq_doorbell_control);
3539
3540         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3541         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3542                mqd->cp_hqd_pq_wptr_lo);
3543         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3544                mqd->cp_hqd_pq_wptr_hi);
3545
3546         /* set the vmid for the queue */
3547         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3548
3549         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3550                mqd->cp_hqd_persistent_state);
3551
3552         /* activate the queue */
3553         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3554                mqd->cp_hqd_active);
3555
3556         if (ring->use_doorbell)
3557                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3558
3559         return 0;
3560 }
3561
3562 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3563 {
3564         struct amdgpu_device *adev = ring->adev;
3565         int j;
3566
3567         /* disable the queue if it's active */
3568         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3569
3570                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3571
3572                 for (j = 0; j < adev->usec_timeout; j++) {
3573                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3574                                 break;
3575                         udelay(1);
3576                 }
3577
3578                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3579                         DRM_DEBUG("KIQ dequeue request failed.\n");
3580
3581                         /* Manual disable if dequeue request times out */
3582                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3583                 }
3584
3585                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3586                       0);
3587         }
3588
3589         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3590         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3591         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3592         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3593         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3594         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3595         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3596         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3597
3598         return 0;
3599 }
3600
3601 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3602 {
3603         struct amdgpu_device *adev = ring->adev;
3604         struct v9_mqd *mqd = ring->mqd_ptr;
3605         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3606
3607         gfx_v9_0_kiq_setting(ring);
3608
3609         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3610                 /* reset MQD to a clean status */
3611                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3612                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3613
3614                 /* reset ring buffer */
3615                 ring->wptr = 0;
3616                 amdgpu_ring_clear_ring(ring);
3617
3618                 mutex_lock(&adev->srbm_mutex);
3619                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3620                 gfx_v9_0_kiq_init_register(ring);
3621                 soc15_grbm_select(adev, 0, 0, 0, 0);
3622                 mutex_unlock(&adev->srbm_mutex);
3623         } else {
3624                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3625                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3626                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3627                 mutex_lock(&adev->srbm_mutex);
3628                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3629                 gfx_v9_0_mqd_init(ring);
3630                 gfx_v9_0_kiq_init_register(ring);
3631                 soc15_grbm_select(adev, 0, 0, 0, 0);
3632                 mutex_unlock(&adev->srbm_mutex);
3633
3634                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3635                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3636         }
3637
3638         return 0;
3639 }
3640
3641 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3642 {
3643         struct amdgpu_device *adev = ring->adev;
3644         struct v9_mqd *mqd = ring->mqd_ptr;
3645         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3646
3647         if (!adev->in_gpu_reset && !adev->in_suspend) {
3648                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3649                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3650                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3651                 mutex_lock(&adev->srbm_mutex);
3652                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3653                 gfx_v9_0_mqd_init(ring);
3654                 soc15_grbm_select(adev, 0, 0, 0, 0);
3655                 mutex_unlock(&adev->srbm_mutex);
3656
3657                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3658                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3659         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3660                 /* reset MQD to a clean status */
3661                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3662                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3663
3664                 /* reset ring buffer */
3665                 ring->wptr = 0;
3666                 amdgpu_ring_clear_ring(ring);
3667         } else {
3668                 amdgpu_ring_clear_ring(ring);
3669         }
3670
3671         return 0;
3672 }
3673
3674 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3675 {
3676         struct amdgpu_ring *ring;
3677         int r;
3678
3679         ring = &adev->gfx.kiq.ring;
3680
3681         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3682         if (unlikely(r != 0))
3683                 return r;
3684
3685         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3686         if (unlikely(r != 0))
3687                 return r;
3688
3689         gfx_v9_0_kiq_init_queue(ring);
3690         amdgpu_bo_kunmap(ring->mqd_obj);
3691         ring->mqd_ptr = NULL;
3692         amdgpu_bo_unreserve(ring->mqd_obj);
3693         ring->sched.ready = true;
3694         return 0;
3695 }
3696
3697 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3698 {
3699         struct amdgpu_ring *ring = NULL;
3700         int r = 0, i;
3701
3702         gfx_v9_0_cp_compute_enable(adev, true);
3703
3704         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3705                 ring = &adev->gfx.compute_ring[i];
3706
3707                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3708                 if (unlikely(r != 0))
3709                         goto done;
3710                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3711                 if (!r) {
3712                         r = gfx_v9_0_kcq_init_queue(ring);
3713                         amdgpu_bo_kunmap(ring->mqd_obj);
3714                         ring->mqd_ptr = NULL;
3715                 }
3716                 amdgpu_bo_unreserve(ring->mqd_obj);
3717                 if (r)
3718                         goto done;
3719         }
3720
3721         r = amdgpu_gfx_enable_kcq(adev);
3722 done:
3723         return r;
3724 }
3725
3726 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3727 {
3728         int r, i;
3729         struct amdgpu_ring *ring;
3730
3731         if (!(adev->flags & AMD_IS_APU))
3732                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3733
3734         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3735                 if (adev->asic_type != CHIP_ARCTURUS) {
3736                         /* legacy firmware loading */
3737                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3738                         if (r)
3739                                 return r;
3740                 }
3741
3742                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3743                 if (r)
3744                         return r;
3745         }
3746
3747         r = gfx_v9_0_kiq_resume(adev);
3748         if (r)
3749                 return r;
3750
3751         if (adev->asic_type != CHIP_ARCTURUS) {
3752                 r = gfx_v9_0_cp_gfx_resume(adev);
3753                 if (r)
3754                         return r;
3755         }
3756
3757         r = gfx_v9_0_kcq_resume(adev);
3758         if (r)
3759                 return r;
3760
3761         if (adev->asic_type != CHIP_ARCTURUS) {
3762                 ring = &adev->gfx.gfx_ring[0];
3763                 r = amdgpu_ring_test_helper(ring);
3764                 if (r)
3765                         return r;
3766         }
3767
3768         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3769                 ring = &adev->gfx.compute_ring[i];
3770                 amdgpu_ring_test_helper(ring);
3771         }
3772
3773         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3774
3775         return 0;
3776 }
3777
3778 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3779 {
3780         u32 tmp;
3781
3782         if (adev->asic_type != CHIP_ARCTURUS)
3783                 return;
3784
3785         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3786         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3787                                 adev->df.hash_status.hash_64k);
3788         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3789                                 adev->df.hash_status.hash_2m);
3790         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3791                                 adev->df.hash_status.hash_1g);
3792         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3793 }
3794
3795 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3796 {
3797         if (adev->asic_type != CHIP_ARCTURUS)
3798                 gfx_v9_0_cp_gfx_enable(adev, enable);
3799         gfx_v9_0_cp_compute_enable(adev, enable);
3800 }
3801
3802 static int gfx_v9_0_hw_init(void *handle)
3803 {
3804         int r;
3805         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3806
3807         if (!amdgpu_sriov_vf(adev))
3808                 gfx_v9_0_init_golden_registers(adev);
3809
3810         gfx_v9_0_constants_init(adev);
3811
3812         gfx_v9_0_init_tcp_config(adev);
3813
3814         r = adev->gfx.rlc.funcs->resume(adev);
3815         if (r)
3816                 return r;
3817
3818         r = gfx_v9_0_cp_resume(adev);
3819         if (r)
3820                 return r;
3821
3822         return r;
3823 }
3824
3825 static int gfx_v9_0_hw_fini(void *handle)
3826 {
3827         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3828
3829         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3830         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3831         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3832
3833         /* DF freeze and kcq disable will fail */
3834         if (!amdgpu_ras_intr_triggered())
3835                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3836                 amdgpu_gfx_disable_kcq(adev);
3837
3838         if (amdgpu_sriov_vf(adev)) {
3839                 gfx_v9_0_cp_gfx_enable(adev, false);
3840                 /* must disable polling for SRIOV when hw finished, otherwise
3841                  * CPC engine may still keep fetching WB address which is already
3842                  * invalid after sw finished and trigger DMAR reading error in
3843                  * hypervisor side.
3844                  */
3845                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3846                 return 0;
3847         }
3848
3849         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3850          * otherwise KIQ is hanging when binding back
3851          */
3852         if (!adev->in_gpu_reset && !adev->in_suspend) {
3853                 mutex_lock(&adev->srbm_mutex);
3854                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3855                                 adev->gfx.kiq.ring.pipe,
3856                                 adev->gfx.kiq.ring.queue, 0);
3857                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3858                 soc15_grbm_select(adev, 0, 0, 0, 0);
3859                 mutex_unlock(&adev->srbm_mutex);
3860         }
3861
3862         gfx_v9_0_cp_enable(adev, false);
3863         adev->gfx.rlc.funcs->stop(adev);
3864
3865         return 0;
3866 }
3867
3868 static int gfx_v9_0_suspend(void *handle)
3869 {
3870         return gfx_v9_0_hw_fini(handle);
3871 }
3872
3873 static int gfx_v9_0_resume(void *handle)
3874 {
3875         return gfx_v9_0_hw_init(handle);
3876 }
3877
3878 static bool gfx_v9_0_is_idle(void *handle)
3879 {
3880         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3881
3882         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3883                                 GRBM_STATUS, GUI_ACTIVE))
3884                 return false;
3885         else
3886                 return true;
3887 }
3888
3889 static int gfx_v9_0_wait_for_idle(void *handle)
3890 {
3891         unsigned i;
3892         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3893
3894         for (i = 0; i < adev->usec_timeout; i++) {
3895                 if (gfx_v9_0_is_idle(handle))
3896                         return 0;
3897                 udelay(1);
3898         }
3899         return -ETIMEDOUT;
3900 }
3901
3902 static int gfx_v9_0_soft_reset(void *handle)
3903 {
3904         u32 grbm_soft_reset = 0;
3905         u32 tmp;
3906         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3907
3908         /* GRBM_STATUS */
3909         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3910         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3911                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3912                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3913                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3914                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3915                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3916                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3917                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3918                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3919                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3920         }
3921
3922         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3923                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3924                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3925         }
3926
3927         /* GRBM_STATUS2 */
3928         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3929         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3930                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3931                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3932
3933
3934         if (grbm_soft_reset) {
3935                 /* stop the rlc */
3936                 adev->gfx.rlc.funcs->stop(adev);
3937
3938                 if (adev->asic_type != CHIP_ARCTURUS)
3939                         /* Disable GFX parsing/prefetching */
3940                         gfx_v9_0_cp_gfx_enable(adev, false);
3941
3942                 /* Disable MEC parsing/prefetching */
3943                 gfx_v9_0_cp_compute_enable(adev, false);
3944
3945                 if (grbm_soft_reset) {
3946                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3947                         tmp |= grbm_soft_reset;
3948                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3949                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3950                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3951
3952                         udelay(50);
3953
3954                         tmp &= ~grbm_soft_reset;
3955                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3956                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3957                 }
3958
3959                 /* Wait a little for things to settle down */
3960                 udelay(50);
3961         }
3962         return 0;
3963 }
3964
3965 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3966 {
3967         uint64_t clock;
3968
3969         amdgpu_gfx_off_ctrl(adev, false);
3970         mutex_lock(&adev->gfx.gpu_clock_mutex);
3971         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3972                 uint32_t tmp, lsb, msb, i = 0;
3973                 do {
3974                         if (i != 0)
3975                                 udelay(1);
3976                         tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3977                         lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3978                         msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3979                         i++;
3980                 } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3981                 clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3982         } else {
3983                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3984                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3985                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3986         }
3987         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3988         amdgpu_gfx_off_ctrl(adev, true);
3989         return clock;
3990 }
3991
3992 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3993                                           uint32_t vmid,
3994                                           uint32_t gds_base, uint32_t gds_size,
3995                                           uint32_t gws_base, uint32_t gws_size,
3996                                           uint32_t oa_base, uint32_t oa_size)
3997 {
3998         struct amdgpu_device *adev = ring->adev;
3999
4000         /* GDS Base */
4001         gfx_v9_0_write_data_to_reg(ring, 0, false,
4002                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4003                                    gds_base);
4004
4005         /* GDS Size */
4006         gfx_v9_0_write_data_to_reg(ring, 0, false,
4007                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4008                                    gds_size);
4009
4010         /* GWS */
4011         gfx_v9_0_write_data_to_reg(ring, 0, false,
4012                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4013                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4014
4015         /* OA */
4016         gfx_v9_0_write_data_to_reg(ring, 0, false,
4017                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4018                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4019 }
4020
4021 static const u32 vgpr_init_compute_shader[] =
4022 {
4023         0xb07c0000, 0xbe8000ff,
4024         0x000000f8, 0xbf110800,
4025         0x7e000280, 0x7e020280,
4026         0x7e040280, 0x7e060280,
4027         0x7e080280, 0x7e0a0280,
4028         0x7e0c0280, 0x7e0e0280,
4029         0x80808800, 0xbe803200,
4030         0xbf84fff5, 0xbf9c0000,
4031         0xd28c0001, 0x0001007f,
4032         0xd28d0001, 0x0002027e,
4033         0x10020288, 0xb8810904,
4034         0xb7814000, 0xd1196a01,
4035         0x00000301, 0xbe800087,
4036         0xbefc00c1, 0xd89c4000,
4037         0x00020201, 0xd89cc080,
4038         0x00040401, 0x320202ff,
4039         0x00000800, 0x80808100,
4040         0xbf84fff8, 0x7e020280,
4041         0xbf810000, 0x00000000,
4042 };
4043
4044 static const u32 sgpr_init_compute_shader[] =
4045 {
4046         0xb07c0000, 0xbe8000ff,
4047         0x0000005f, 0xbee50080,
4048         0xbe812c65, 0xbe822c65,
4049         0xbe832c65, 0xbe842c65,
4050         0xbe852c65, 0xb77c0005,
4051         0x80808500, 0xbf84fff8,
4052         0xbe800080, 0xbf810000,
4053 };
4054
4055 /* When below register arrays changed, please update gpr_reg_size,
4056   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4057   to cover all gfx9 ASICs */
4058 static const struct soc15_reg_entry vgpr_init_regs[] = {
4059    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4060    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4061    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4062    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4063    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4064    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4065    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4066    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4067    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4068    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4069    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4070    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4071    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4072    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4073 };
4074
4075 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4076    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4077    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4078    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4079    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4080    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4081    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4082    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4083    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4084    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4085    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4086    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4087    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4088    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4089    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4090 };
4091
4092 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4093    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4094    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4095    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4096    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4097    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4098    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4099    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4100    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4101    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4102    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4103    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4104    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4105    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4106    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4107 };
4108
4109 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4110    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4111    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4112    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4113    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4114    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4115    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4116    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4117    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4118    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4119    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4120    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4121    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4122    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4123    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4124    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4125    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4126    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4127    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4128    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4129    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4130    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4131    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4132    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4133    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4134    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4135    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4136    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4137    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4138    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4139    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4140    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4141    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4142    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4143    { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1},
4144 };
4145
4146 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4147 {
4148         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4149         int i, r;
4150
4151         /* only support when RAS is enabled */
4152         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4153                 return 0;
4154
4155         r = amdgpu_ring_alloc(ring, 7);
4156         if (r) {
4157                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4158                         ring->name, r);
4159                 return r;
4160         }
4161
4162         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4163         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4164
4165         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4166         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4167                                 PACKET3_DMA_DATA_DST_SEL(1) |
4168                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4169                                 PACKET3_DMA_DATA_ENGINE(0)));
4170         amdgpu_ring_write(ring, 0);
4171         amdgpu_ring_write(ring, 0);
4172         amdgpu_ring_write(ring, 0);
4173         amdgpu_ring_write(ring, 0);
4174         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4175                                 adev->gds.gds_size);
4176
4177         amdgpu_ring_commit(ring);
4178
4179         for (i = 0; i < adev->usec_timeout; i++) {
4180                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4181                         break;
4182                 udelay(1);
4183         }
4184
4185         if (i >= adev->usec_timeout)
4186                 r = -ETIMEDOUT;
4187
4188         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4189
4190         return r;
4191 }
4192
4193 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4194 {
4195         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4196         struct amdgpu_ib ib;
4197         struct dma_fence *f = NULL;
4198         int r, i;
4199         unsigned total_size, vgpr_offset, sgpr_offset;
4200         u64 gpu_addr;
4201
4202         int compute_dim_x = adev->gfx.config.max_shader_engines *
4203                                                 adev->gfx.config.max_cu_per_sh *
4204                                                 adev->gfx.config.max_sh_per_se;
4205         int sgpr_work_group_size = 5;
4206         int gpr_reg_size = compute_dim_x / 16 + 6;
4207
4208         /* only support when RAS is enabled */
4209         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4210                 return 0;
4211
4212         /* bail if the compute ring is not ready */
4213         if (!ring->sched.ready)
4214                 return 0;
4215
4216         total_size =
4217                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4218         total_size +=
4219                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4220         total_size +=
4221                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4222         total_size = ALIGN(total_size, 256);
4223         vgpr_offset = total_size;
4224         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4225         sgpr_offset = total_size;
4226         total_size += sizeof(sgpr_init_compute_shader);
4227
4228         /* allocate an indirect buffer to put the commands in */
4229         memset(&ib, 0, sizeof(ib));
4230         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4231         if (r) {
4232                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4233                 return r;
4234         }
4235
4236         /* load the compute shaders */
4237         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4238                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4239
4240         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4241                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4242
4243         /* init the ib length to 0 */
4244         ib.length_dw = 0;
4245
4246         /* VGPR */
4247         /* write the register state for the compute dispatch */
4248         for (i = 0; i < gpr_reg_size; i++) {
4249                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4250                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4251                                                                 - PACKET3_SET_SH_REG_START;
4252                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4253         }
4254         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4255         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4256         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4257         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4258                                                         - PACKET3_SET_SH_REG_START;
4259         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4260         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4261
4262         /* write dispatch packet */
4263         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4264         ib.ptr[ib.length_dw++] = compute_dim_x; /* x */
4265         ib.ptr[ib.length_dw++] = 1; /* y */
4266         ib.ptr[ib.length_dw++] = 1; /* z */
4267         ib.ptr[ib.length_dw++] =
4268                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4269
4270         /* write CS partial flush packet */
4271         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4272         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4273
4274         /* SGPR1 */
4275         /* write the register state for the compute dispatch */
4276         for (i = 0; i < gpr_reg_size; i++) {
4277                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4278                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4279                                                                 - PACKET3_SET_SH_REG_START;
4280                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4281         }
4282         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4283         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4284         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4285         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4286                                                         - PACKET3_SET_SH_REG_START;
4287         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4288         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4289
4290         /* write dispatch packet */
4291         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4292         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4293         ib.ptr[ib.length_dw++] = 1; /* y */
4294         ib.ptr[ib.length_dw++] = 1; /* z */
4295         ib.ptr[ib.length_dw++] =
4296                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4297
4298         /* write CS partial flush packet */
4299         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4300         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4301
4302         /* SGPR2 */
4303         /* write the register state for the compute dispatch */
4304         for (i = 0; i < gpr_reg_size; i++) {
4305                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4306                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4307                                                                 - PACKET3_SET_SH_REG_START;
4308                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4309         }
4310         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4311         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4312         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4313         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4314                                                         - PACKET3_SET_SH_REG_START;
4315         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4316         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4317
4318         /* write dispatch packet */
4319         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4320         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4321         ib.ptr[ib.length_dw++] = 1; /* y */
4322         ib.ptr[ib.length_dw++] = 1; /* z */
4323         ib.ptr[ib.length_dw++] =
4324                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4325
4326         /* write CS partial flush packet */
4327         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4328         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4329
4330         /* shedule the ib on the ring */
4331         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4332         if (r) {
4333                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4334                 goto fail;
4335         }
4336
4337         /* wait for the GPU to finish processing the IB */
4338         r = dma_fence_wait(f, false);
4339         if (r) {
4340                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4341                 goto fail;
4342         }
4343
4344         switch (adev->asic_type)
4345         {
4346         case CHIP_VEGA20:
4347                 gfx_v9_0_clear_ras_edc_counter(adev);
4348                 break;
4349         case CHIP_ARCTURUS:
4350                 gfx_v9_4_clear_ras_edc_counter(adev);
4351                 break;
4352         default:
4353                 break;
4354         }
4355
4356 fail:
4357         amdgpu_ib_free(adev, &ib, NULL);
4358         dma_fence_put(f);
4359
4360         return r;
4361 }
4362
4363 static int gfx_v9_0_early_init(void *handle)
4364 {
4365         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4366
4367         if (adev->asic_type == CHIP_ARCTURUS)
4368                 adev->gfx.num_gfx_rings = 0;
4369         else
4370                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4371         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4372         gfx_v9_0_set_kiq_pm4_funcs(adev);
4373         gfx_v9_0_set_ring_funcs(adev);
4374         gfx_v9_0_set_irq_funcs(adev);
4375         gfx_v9_0_set_gds_init(adev);
4376         gfx_v9_0_set_rlc_funcs(adev);
4377
4378         return 0;
4379 }
4380
4381 static int gfx_v9_0_ecc_late_init(void *handle)
4382 {
4383         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4384         int r;
4385
4386         /*
4387          * Temp workaround to fix the issue that CP firmware fails to
4388          * update read pointer when CPDMA is writing clearing operation
4389          * to GDS in suspend/resume sequence on several cards. So just
4390          * limit this operation in cold boot sequence.
4391          */
4392         if (!adev->in_suspend) {
4393                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4394                 if (r)
4395                         return r;
4396         }
4397
4398         /* requires IBs so do in late init after IB pool is initialized */
4399         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4400         if (r)
4401                 return r;
4402
4403         r = amdgpu_gfx_ras_late_init(adev);
4404         if (r)
4405                 return r;
4406
4407         return 0;
4408 }
4409
4410 static int gfx_v9_0_late_init(void *handle)
4411 {
4412         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4413         int r;
4414
4415         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4416         if (r)
4417                 return r;
4418
4419         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4420         if (r)
4421                 return r;
4422
4423         r = gfx_v9_0_ecc_late_init(handle);
4424         if (r)
4425                 return r;
4426
4427         return 0;
4428 }
4429
4430 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4431 {
4432         uint32_t rlc_setting;
4433
4434         /* if RLC is not enabled, do nothing */
4435         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4436         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4437                 return false;
4438
4439         return true;
4440 }
4441
4442 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4443 {
4444         uint32_t data;
4445         unsigned i;
4446
4447         data = RLC_SAFE_MODE__CMD_MASK;
4448         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4449         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4450
4451         /* wait for RLC_SAFE_MODE */
4452         for (i = 0; i < adev->usec_timeout; i++) {
4453                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4454                         break;
4455                 udelay(1);
4456         }
4457 }
4458
4459 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4460 {
4461         uint32_t data;
4462
4463         data = RLC_SAFE_MODE__CMD_MASK;
4464         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4465 }
4466
4467 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4468                                                 bool enable)
4469 {
4470         amdgpu_gfx_rlc_enter_safe_mode(adev);
4471
4472         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4473                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4474                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4475                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4476         } else {
4477                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4478                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4479                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4480         }
4481
4482         amdgpu_gfx_rlc_exit_safe_mode(adev);
4483 }
4484
4485 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4486                                                 bool enable)
4487 {
4488         /* TODO: double check if we need to perform under safe mode */
4489         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4490
4491         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4492                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4493         else
4494                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4495
4496         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4497                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4498         else
4499                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4500
4501         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4502 }
4503
4504 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4505                                                       bool enable)
4506 {
4507         uint32_t data, def;
4508
4509         amdgpu_gfx_rlc_enter_safe_mode(adev);
4510
4511         /* It is disabled by HW by default */
4512         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4513                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4514                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4515
4516                 if (adev->asic_type != CHIP_VEGA12)
4517                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4518
4519                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4520                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4521                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4522
4523                 /* only for Vega10 & Raven1 */
4524                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4525
4526                 if (def != data)
4527                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4528
4529                 /* MGLS is a global flag to control all MGLS in GFX */
4530                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4531                         /* 2 - RLC memory Light sleep */
4532                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4533                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4534                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4535                                 if (def != data)
4536                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4537                         }
4538                         /* 3 - CP memory Light sleep */
4539                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4540                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4541                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4542                                 if (def != data)
4543                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4544                         }
4545                 }
4546         } else {
4547                 /* 1 - MGCG_OVERRIDE */
4548                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4549
4550                 if (adev->asic_type != CHIP_VEGA12)
4551                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4552
4553                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4554                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4555                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4556                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4557
4558                 if (def != data)
4559                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4560
4561                 /* 2 - disable MGLS in RLC */
4562                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4563                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4564                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4565                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4566                 }
4567
4568                 /* 3 - disable MGLS in CP */
4569                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4570                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4571                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4572                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4573                 }
4574         }
4575
4576         amdgpu_gfx_rlc_exit_safe_mode(adev);
4577 }
4578
4579 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4580                                            bool enable)
4581 {
4582         uint32_t data, def;
4583
4584         if (adev->asic_type == CHIP_ARCTURUS)
4585                 return;
4586
4587         amdgpu_gfx_rlc_enter_safe_mode(adev);
4588
4589         /* Enable 3D CGCG/CGLS */
4590         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4591                 /* write cmd to clear cgcg/cgls ov */
4592                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4593                 /* unset CGCG override */
4594                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4595                 /* update CGCG and CGLS override bits */
4596                 if (def != data)
4597                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4598
4599                 /* enable 3Dcgcg FSM(0x0000363f) */
4600                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4601
4602                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4603                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4604                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4605                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4606                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4607                 if (def != data)
4608                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4609
4610                 /* set IDLE_POLL_COUNT(0x00900100) */
4611                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4612                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4613                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4614                 if (def != data)
4615                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4616         } else {
4617                 /* Disable CGCG/CGLS */
4618                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4619                 /* disable cgcg, cgls should be disabled */
4620                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4621                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4622                 /* disable cgcg and cgls in FSM */
4623                 if (def != data)
4624                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4625         }
4626
4627         amdgpu_gfx_rlc_exit_safe_mode(adev);
4628 }
4629
4630 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4631                                                       bool enable)
4632 {
4633         uint32_t def, data;
4634
4635         amdgpu_gfx_rlc_enter_safe_mode(adev);
4636
4637         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4638                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4639                 /* unset CGCG override */
4640                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4641                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4642                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4643                 else
4644                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4645                 /* update CGCG and CGLS override bits */
4646                 if (def != data)
4647                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4648
4649                 /* enable cgcg FSM(0x0000363F) */
4650                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4651
4652                 if (adev->asic_type == CHIP_ARCTURUS)
4653                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4654                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4655                 else
4656                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4657                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4658                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4659                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4660                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4661                 if (def != data)
4662                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4663
4664                 /* set IDLE_POLL_COUNT(0x00900100) */
4665                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4666                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4667                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4668                 if (def != data)
4669                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4670         } else {
4671                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4672                 /* reset CGCG/CGLS bits */
4673                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4674                 /* disable cgcg and cgls in FSM */
4675                 if (def != data)
4676                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4677         }
4678
4679         amdgpu_gfx_rlc_exit_safe_mode(adev);
4680 }
4681
4682 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4683                                             bool enable)
4684 {
4685         if (enable) {
4686                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4687                  * ===  MGCG + MGLS ===
4688                  */
4689                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4690                 /* ===  CGCG /CGLS for GFX 3D Only === */
4691                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4692                 /* ===  CGCG + CGLS === */
4693                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4694         } else {
4695                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4696                  * ===  CGCG + CGLS ===
4697                  */
4698                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4699                 /* ===  CGCG /CGLS for GFX 3D Only === */
4700                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4701                 /* ===  MGCG + MGLS === */
4702                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4703         }
4704         return 0;
4705 }
4706
4707 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4708         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4709         .set_safe_mode = gfx_v9_0_set_safe_mode,
4710         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4711         .init = gfx_v9_0_rlc_init,
4712         .get_csb_size = gfx_v9_0_get_csb_size,
4713         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4714         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4715         .resume = gfx_v9_0_rlc_resume,
4716         .stop = gfx_v9_0_rlc_stop,
4717         .reset = gfx_v9_0_rlc_reset,
4718         .start = gfx_v9_0_rlc_start
4719 };
4720
4721 static int gfx_v9_0_set_powergating_state(void *handle,
4722                                           enum amd_powergating_state state)
4723 {
4724         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4725         bool enable = (state == AMD_PG_STATE_GATE);
4726
4727         switch (adev->asic_type) {
4728         case CHIP_RAVEN:
4729         case CHIP_RENOIR:
4730                 if (!enable) {
4731                         amdgpu_gfx_off_ctrl(adev, false);
4732                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4733                 }
4734                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4735                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4736                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4737                 } else {
4738                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4739                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4740                 }
4741
4742                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4743                         gfx_v9_0_enable_cp_power_gating(adev, true);
4744                 else
4745                         gfx_v9_0_enable_cp_power_gating(adev, false);
4746
4747                 /* update gfx cgpg state */
4748                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4749
4750                 /* update mgcg state */
4751                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4752
4753                 if (enable)
4754                         amdgpu_gfx_off_ctrl(adev, true);
4755                 break;
4756         case CHIP_VEGA12:
4757                 if (!enable) {
4758                         amdgpu_gfx_off_ctrl(adev, false);
4759                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4760                 } else {
4761                         amdgpu_gfx_off_ctrl(adev, true);
4762                 }
4763                 break;
4764         default:
4765                 break;
4766         }
4767
4768         return 0;
4769 }
4770
4771 static int gfx_v9_0_set_clockgating_state(void *handle,
4772                                           enum amd_clockgating_state state)
4773 {
4774         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4775
4776         if (amdgpu_sriov_vf(adev))
4777                 return 0;
4778
4779         switch (adev->asic_type) {
4780         case CHIP_VEGA10:
4781         case CHIP_VEGA12:
4782         case CHIP_VEGA20:
4783         case CHIP_RAVEN:
4784         case CHIP_ARCTURUS:
4785         case CHIP_RENOIR:
4786                 gfx_v9_0_update_gfx_clock_gating(adev,
4787                                                  state == AMD_CG_STATE_GATE);
4788                 break;
4789         default:
4790                 break;
4791         }
4792         return 0;
4793 }
4794
4795 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4796 {
4797         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4798         int data;
4799
4800         if (amdgpu_sriov_vf(adev))
4801                 *flags = 0;
4802
4803         /* AMD_CG_SUPPORT_GFX_MGCG */
4804         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
4805         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4806                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4807
4808         /* AMD_CG_SUPPORT_GFX_CGCG */
4809         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
4810         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4811                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4812
4813         /* AMD_CG_SUPPORT_GFX_CGLS */
4814         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4815                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4816
4817         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4818         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
4819         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4820                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4821
4822         /* AMD_CG_SUPPORT_GFX_CP_LS */
4823         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
4824         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4825                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4826
4827         if (adev->asic_type != CHIP_ARCTURUS) {
4828                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4829                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
4830                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4831                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4832
4833                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4834                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4835                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4836         }
4837 }
4838
4839 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4840 {
4841         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4842 }
4843
4844 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4845 {
4846         struct amdgpu_device *adev = ring->adev;
4847         u64 wptr;
4848
4849         /* XXX check if swapping is necessary on BE */
4850         if (ring->use_doorbell) {
4851                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4852         } else {
4853                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4854                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4855         }
4856
4857         return wptr;
4858 }
4859
4860 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4861 {
4862         struct amdgpu_device *adev = ring->adev;
4863
4864         if (ring->use_doorbell) {
4865                 /* XXX check if swapping is necessary on BE */
4866                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4867                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4868         } else {
4869                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4870                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4871         }
4872 }
4873
4874 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4875 {
4876         struct amdgpu_device *adev = ring->adev;
4877         u32 ref_and_mask, reg_mem_engine;
4878         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4879
4880         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4881                 switch (ring->me) {
4882                 case 1:
4883                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4884                         break;
4885                 case 2:
4886                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4887                         break;
4888                 default:
4889                         return;
4890                 }
4891                 reg_mem_engine = 0;
4892         } else {
4893                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4894                 reg_mem_engine = 1; /* pfp */
4895         }
4896
4897         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4898                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4899                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4900                               ref_and_mask, ref_and_mask, 0x20);
4901 }
4902
4903 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4904                                         struct amdgpu_job *job,
4905                                         struct amdgpu_ib *ib,
4906                                         uint32_t flags)
4907 {
4908         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4909         u32 header, control = 0;
4910
4911         if (ib->flags & AMDGPU_IB_FLAG_CE)
4912                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4913         else
4914                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4915
4916         control |= ib->length_dw | (vmid << 24);
4917
4918         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4919                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4920
4921                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4922                         gfx_v9_0_ring_emit_de_meta(ring);
4923         }
4924
4925         amdgpu_ring_write(ring, header);
4926         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4927         amdgpu_ring_write(ring,
4928 #ifdef __BIG_ENDIAN
4929                 (2 << 0) |
4930 #endif
4931                 lower_32_bits(ib->gpu_addr));
4932         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4933         amdgpu_ring_write(ring, control);
4934 }
4935
4936 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4937                                           struct amdgpu_job *job,
4938                                           struct amdgpu_ib *ib,
4939                                           uint32_t flags)
4940 {
4941         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4942         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4943
4944         /* Currently, there is a high possibility to get wave ID mismatch
4945          * between ME and GDS, leading to a hw deadlock, because ME generates
4946          * different wave IDs than the GDS expects. This situation happens
4947          * randomly when at least 5 compute pipes use GDS ordered append.
4948          * The wave IDs generated by ME are also wrong after suspend/resume.
4949          * Those are probably bugs somewhere else in the kernel driver.
4950          *
4951          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4952          * GDS to 0 for this ring (me/pipe).
4953          */
4954         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4955                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4956                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4957                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4958         }
4959
4960         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4961         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4962         amdgpu_ring_write(ring,
4963 #ifdef __BIG_ENDIAN
4964                                 (2 << 0) |
4965 #endif
4966                                 lower_32_bits(ib->gpu_addr));
4967         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4968         amdgpu_ring_write(ring, control);
4969 }
4970
4971 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4972                                      u64 seq, unsigned flags)
4973 {
4974         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4975         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4976         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4977
4978         /* RELEASE_MEM - flush caches, send int */
4979         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4980         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4981                                                EOP_TC_NC_ACTION_EN) :
4982                                               (EOP_TCL1_ACTION_EN |
4983                                                EOP_TC_ACTION_EN |
4984                                                EOP_TC_WB_ACTION_EN |
4985                                                EOP_TC_MD_ACTION_EN)) |
4986                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4987                                  EVENT_INDEX(5)));
4988         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4989
4990         /*
4991          * the address should be Qword aligned if 64bit write, Dword
4992          * aligned if only send 32bit data low (discard data high)
4993          */
4994         if (write64bit)
4995                 BUG_ON(addr & 0x7);
4996         else
4997                 BUG_ON(addr & 0x3);
4998         amdgpu_ring_write(ring, lower_32_bits(addr));
4999         amdgpu_ring_write(ring, upper_32_bits(addr));
5000         amdgpu_ring_write(ring, lower_32_bits(seq));
5001         amdgpu_ring_write(ring, upper_32_bits(seq));
5002         amdgpu_ring_write(ring, 0);
5003 }
5004
5005 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5006 {
5007         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5008         uint32_t seq = ring->fence_drv.sync_seq;
5009         uint64_t addr = ring->fence_drv.gpu_addr;
5010
5011         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5012                               lower_32_bits(addr), upper_32_bits(addr),
5013                               seq, 0xffffffff, 4);
5014 }
5015
5016 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5017                                         unsigned vmid, uint64_t pd_addr)
5018 {
5019         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5020
5021         /* compute doesn't have PFP */
5022         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5023                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5024                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5025                 amdgpu_ring_write(ring, 0x0);
5026         }
5027 }
5028
5029 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5030 {
5031         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5032 }
5033
5034 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5035 {
5036         u64 wptr;
5037
5038         /* XXX check if swapping is necessary on BE */
5039         if (ring->use_doorbell)
5040                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5041         else
5042                 BUG();
5043         return wptr;
5044 }
5045
5046 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5047                                            bool acquire)
5048 {
5049         struct amdgpu_device *adev = ring->adev;
5050         int pipe_num, tmp, reg;
5051         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5052
5053         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5054
5055         /* first me only has 2 entries, GFX and HP3D */
5056         if (ring->me > 0)
5057                 pipe_num -= 2;
5058
5059         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5060         tmp = RREG32(reg);
5061         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5062         WREG32(reg, tmp);
5063 }
5064
5065 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5066                                             struct amdgpu_ring *ring,
5067                                             bool acquire)
5068 {
5069         int i, pipe;
5070         bool reserve;
5071         struct amdgpu_ring *iring;
5072
5073         mutex_lock(&adev->gfx.pipe_reserve_mutex);
5074         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5075         if (acquire)
5076                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5077         else
5078                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5079
5080         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5081                 /* Clear all reservations - everyone reacquires all resources */
5082                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5083                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5084                                                        true);
5085
5086                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5087                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5088                                                        true);
5089         } else {
5090                 /* Lower all pipes without a current reservation */
5091                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5092                         iring = &adev->gfx.gfx_ring[i];
5093                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5094                                                            iring->me,
5095                                                            iring->pipe,
5096                                                            0);
5097                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5098                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5099                 }
5100
5101                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5102                         iring = &adev->gfx.compute_ring[i];
5103                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5104                                                            iring->me,
5105                                                            iring->pipe,
5106                                                            0);
5107                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5108                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5109                 }
5110         }
5111
5112         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5113 }
5114
5115 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5116                                       struct amdgpu_ring *ring,
5117                                       bool acquire)
5118 {
5119         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5120         uint32_t queue_priority = acquire ? 0xf : 0x0;
5121
5122         mutex_lock(&adev->srbm_mutex);
5123         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5124
5125         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5126         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5127
5128         soc15_grbm_select(adev, 0, 0, 0, 0);
5129         mutex_unlock(&adev->srbm_mutex);
5130 }
5131
5132 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5133                                                enum drm_sched_priority priority)
5134 {
5135         struct amdgpu_device *adev = ring->adev;
5136         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5137
5138         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5139                 return;
5140
5141         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5142         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5143 }
5144
5145 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5146 {
5147         struct amdgpu_device *adev = ring->adev;
5148
5149         /* XXX check if swapping is necessary on BE */
5150         if (ring->use_doorbell) {
5151                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5152                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5153         } else{
5154                 BUG(); /* only DOORBELL method supported on gfx9 now */
5155         }
5156 }
5157
5158 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5159                                          u64 seq, unsigned int flags)
5160 {
5161         struct amdgpu_device *adev = ring->adev;
5162
5163         /* we only allocate 32bit for each seq wb address */
5164         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5165
5166         /* write fence seq to the "addr" */
5167         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5168         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5169                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5170         amdgpu_ring_write(ring, lower_32_bits(addr));
5171         amdgpu_ring_write(ring, upper_32_bits(addr));
5172         amdgpu_ring_write(ring, lower_32_bits(seq));
5173
5174         if (flags & AMDGPU_FENCE_FLAG_INT) {
5175                 /* set register to trigger INT */
5176                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5177                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5178                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5179                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5180                 amdgpu_ring_write(ring, 0);
5181                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5182         }
5183 }
5184
5185 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5186 {
5187         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5188         amdgpu_ring_write(ring, 0);
5189 }
5190
5191 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5192 {
5193         struct v9_ce_ib_state ce_payload = {0};
5194         uint64_t csa_addr;
5195         int cnt;
5196
5197         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5198         csa_addr = amdgpu_csa_vaddr(ring->adev);
5199
5200         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5201         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5202                                  WRITE_DATA_DST_SEL(8) |
5203                                  WR_CONFIRM) |
5204                                  WRITE_DATA_CACHE_POLICY(0));
5205         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5206         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5207         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5208 }
5209
5210 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5211 {
5212         struct v9_de_ib_state de_payload = {0};
5213         uint64_t csa_addr, gds_addr;
5214         int cnt;
5215
5216         csa_addr = amdgpu_csa_vaddr(ring->adev);
5217         gds_addr = csa_addr + 4096;
5218         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5219         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5220
5221         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5222         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5223         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5224                                  WRITE_DATA_DST_SEL(8) |
5225                                  WR_CONFIRM) |
5226                                  WRITE_DATA_CACHE_POLICY(0));
5227         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5228         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5229         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5230 }
5231
5232 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5233 {
5234         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5235         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5236 }
5237
5238 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5239 {
5240         uint32_t dw2 = 0;
5241
5242         if (amdgpu_sriov_vf(ring->adev))
5243                 gfx_v9_0_ring_emit_ce_meta(ring);
5244
5245         gfx_v9_0_ring_emit_tmz(ring, true);
5246
5247         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5248         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5249                 /* set load_global_config & load_global_uconfig */
5250                 dw2 |= 0x8001;
5251                 /* set load_cs_sh_regs */
5252                 dw2 |= 0x01000000;
5253                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5254                 dw2 |= 0x10002;
5255
5256                 /* set load_ce_ram if preamble presented */
5257                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5258                         dw2 |= 0x10000000;
5259         } else {
5260                 /* still load_ce_ram if this is the first time preamble presented
5261                  * although there is no context switch happens.
5262                  */
5263                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5264                         dw2 |= 0x10000000;
5265         }
5266
5267         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5268         amdgpu_ring_write(ring, dw2);
5269         amdgpu_ring_write(ring, 0);
5270 }
5271
5272 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5273 {
5274         unsigned ret;
5275         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5276         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5277         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5278         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5279         ret = ring->wptr & ring->buf_mask;
5280         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5281         return ret;
5282 }
5283
5284 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5285 {
5286         unsigned cur;
5287         BUG_ON(offset > ring->buf_mask);
5288         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5289
5290         cur = (ring->wptr & ring->buf_mask) - 1;
5291         if (likely(cur > offset))
5292                 ring->ring[offset] = cur - offset;
5293         else
5294                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5295 }
5296
5297 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5298 {
5299         struct amdgpu_device *adev = ring->adev;
5300         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
5301
5302         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5303         amdgpu_ring_write(ring, 0 |     /* src: register*/
5304                                 (5 << 8) |      /* dst: memory */
5305                                 (1 << 20));     /* write confirm */
5306         amdgpu_ring_write(ring, reg);
5307         amdgpu_ring_write(ring, 0);
5308         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5309                                 kiq->reg_val_offs * 4));
5310         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5311                                 kiq->reg_val_offs * 4));
5312 }
5313
5314 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5315                                     uint32_t val)
5316 {
5317         uint32_t cmd = 0;
5318
5319         switch (ring->funcs->type) {
5320         case AMDGPU_RING_TYPE_GFX:
5321                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5322                 break;
5323         case AMDGPU_RING_TYPE_KIQ:
5324                 cmd = (1 << 16); /* no inc addr */
5325                 break;
5326         default:
5327                 cmd = WR_CONFIRM;
5328                 break;
5329         }
5330         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5331         amdgpu_ring_write(ring, cmd);
5332         amdgpu_ring_write(ring, reg);
5333         amdgpu_ring_write(ring, 0);
5334         amdgpu_ring_write(ring, val);
5335 }
5336
5337 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5338                                         uint32_t val, uint32_t mask)
5339 {
5340         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5341 }
5342
5343 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5344                                                   uint32_t reg0, uint32_t reg1,
5345                                                   uint32_t ref, uint32_t mask)
5346 {
5347         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5348         struct amdgpu_device *adev = ring->adev;
5349         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5350                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5351
5352         if (fw_version_ok)
5353                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5354                                       ref, mask, 0x20);
5355         else
5356                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5357                                                            ref, mask);
5358 }
5359
5360 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5361 {
5362         struct amdgpu_device *adev = ring->adev;
5363         uint32_t value = 0;
5364
5365         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5366         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5367         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5368         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5369         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5370 }
5371
5372 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5373                                                  enum amdgpu_interrupt_state state)
5374 {
5375         switch (state) {
5376         case AMDGPU_IRQ_STATE_DISABLE:
5377         case AMDGPU_IRQ_STATE_ENABLE:
5378                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5379                                TIME_STAMP_INT_ENABLE,
5380                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5381                 break;
5382         default:
5383                 break;
5384         }
5385 }
5386
5387 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5388                                                      int me, int pipe,
5389                                                      enum amdgpu_interrupt_state state)
5390 {
5391         u32 mec_int_cntl, mec_int_cntl_reg;
5392
5393         /*
5394          * amdgpu controls only the first MEC. That's why this function only
5395          * handles the setting of interrupts for this specific MEC. All other
5396          * pipes' interrupts are set by amdkfd.
5397          */
5398
5399         if (me == 1) {
5400                 switch (pipe) {
5401                 case 0:
5402                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5403                         break;
5404                 case 1:
5405                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5406                         break;
5407                 case 2:
5408                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5409                         break;
5410                 case 3:
5411                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5412                         break;
5413                 default:
5414                         DRM_DEBUG("invalid pipe %d\n", pipe);
5415                         return;
5416                 }
5417         } else {
5418                 DRM_DEBUG("invalid me %d\n", me);
5419                 return;
5420         }
5421
5422         switch (state) {
5423         case AMDGPU_IRQ_STATE_DISABLE:
5424                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5425                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5426                                              TIME_STAMP_INT_ENABLE, 0);
5427                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5428                 break;
5429         case AMDGPU_IRQ_STATE_ENABLE:
5430                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5431                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5432                                              TIME_STAMP_INT_ENABLE, 1);
5433                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5434                 break;
5435         default:
5436                 break;
5437         }
5438 }
5439
5440 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5441                                              struct amdgpu_irq_src *source,
5442                                              unsigned type,
5443                                              enum amdgpu_interrupt_state state)
5444 {
5445         switch (state) {
5446         case AMDGPU_IRQ_STATE_DISABLE:
5447         case AMDGPU_IRQ_STATE_ENABLE:
5448                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5449                                PRIV_REG_INT_ENABLE,
5450                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5451                 break;
5452         default:
5453                 break;
5454         }
5455
5456         return 0;
5457 }
5458
5459 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5460                                               struct amdgpu_irq_src *source,
5461                                               unsigned type,
5462                                               enum amdgpu_interrupt_state state)
5463 {
5464         switch (state) {
5465         case AMDGPU_IRQ_STATE_DISABLE:
5466         case AMDGPU_IRQ_STATE_ENABLE:
5467                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5468                                PRIV_INSTR_INT_ENABLE,
5469                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5470         default:
5471                 break;
5472         }
5473
5474         return 0;
5475 }
5476
5477 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5478         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5479                         CP_ECC_ERROR_INT_ENABLE, 1)
5480
5481 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5482         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5483                         CP_ECC_ERROR_INT_ENABLE, 0)
5484
5485 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5486                                               struct amdgpu_irq_src *source,
5487                                               unsigned type,
5488                                               enum amdgpu_interrupt_state state)
5489 {
5490         switch (state) {
5491         case AMDGPU_IRQ_STATE_DISABLE:
5492                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5493                                 CP_ECC_ERROR_INT_ENABLE, 0);
5494                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5495                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5496                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5497                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5498                 break;
5499
5500         case AMDGPU_IRQ_STATE_ENABLE:
5501                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5502                                 CP_ECC_ERROR_INT_ENABLE, 1);
5503                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5504                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5505                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5506                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5507                 break;
5508         default:
5509                 break;
5510         }
5511
5512         return 0;
5513 }
5514
5515
5516 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5517                                             struct amdgpu_irq_src *src,
5518                                             unsigned type,
5519                                             enum amdgpu_interrupt_state state)
5520 {
5521         switch (type) {
5522         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5523                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5524                 break;
5525         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5526                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5527                 break;
5528         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5529                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5530                 break;
5531         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5532                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5533                 break;
5534         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5535                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5536                 break;
5537         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5538                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5539                 break;
5540         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5541                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5542                 break;
5543         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5544                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5545                 break;
5546         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5547                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5548                 break;
5549         default:
5550                 break;
5551         }
5552         return 0;
5553 }
5554
5555 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5556                             struct amdgpu_irq_src *source,
5557                             struct amdgpu_iv_entry *entry)
5558 {
5559         int i;
5560         u8 me_id, pipe_id, queue_id;
5561         struct amdgpu_ring *ring;
5562
5563         DRM_DEBUG("IH: CP EOP\n");
5564         me_id = (entry->ring_id & 0x0c) >> 2;
5565         pipe_id = (entry->ring_id & 0x03) >> 0;
5566         queue_id = (entry->ring_id & 0x70) >> 4;
5567
5568         switch (me_id) {
5569         case 0:
5570                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5571                 break;
5572         case 1:
5573         case 2:
5574                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5575                         ring = &adev->gfx.compute_ring[i];
5576                         /* Per-queue interrupt is supported for MEC starting from VI.
5577                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5578                           */
5579                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5580                                 amdgpu_fence_process(ring);
5581                 }
5582                 break;
5583         }
5584         return 0;
5585 }
5586
5587 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5588                            struct amdgpu_iv_entry *entry)
5589 {
5590         u8 me_id, pipe_id, queue_id;
5591         struct amdgpu_ring *ring;
5592         int i;
5593
5594         me_id = (entry->ring_id & 0x0c) >> 2;
5595         pipe_id = (entry->ring_id & 0x03) >> 0;
5596         queue_id = (entry->ring_id & 0x70) >> 4;
5597
5598         switch (me_id) {
5599         case 0:
5600                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5601                 break;
5602         case 1:
5603         case 2:
5604                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5605                         ring = &adev->gfx.compute_ring[i];
5606                         if (ring->me == me_id && ring->pipe == pipe_id &&
5607                             ring->queue == queue_id)
5608                                 drm_sched_fault(&ring->sched);
5609                 }
5610                 break;
5611         }
5612 }
5613
5614 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5615                                  struct amdgpu_irq_src *source,
5616                                  struct amdgpu_iv_entry *entry)
5617 {
5618         DRM_ERROR("Illegal register access in command stream\n");
5619         gfx_v9_0_fault(adev, entry);
5620         return 0;
5621 }
5622
5623 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5624                                   struct amdgpu_irq_src *source,
5625                                   struct amdgpu_iv_entry *entry)
5626 {
5627         DRM_ERROR("Illegal instruction in command stream\n");
5628         gfx_v9_0_fault(adev, entry);
5629         return 0;
5630 }
5631
5632
5633 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5634         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5635           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5636           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5637         },
5638         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5639           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5640           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5641         },
5642         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5643           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5644           0, 0
5645         },
5646         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5647           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5648           0, 0
5649         },
5650         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5651           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5652           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5653         },
5654         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5655           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5656           0, 0
5657         },
5658         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5659           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5660           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5661         },
5662         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5663           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5664           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5665         },
5666         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5667           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5668           0, 0
5669         },
5670         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5671           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5672           0, 0
5673         },
5674         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5675           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5676           0, 0
5677         },
5678         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5679           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5680           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5681         },
5682         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5683           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5684           0, 0
5685         },
5686         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5687           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5688           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5689         },
5690         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5691           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5692           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5693           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5694         },
5695         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5696           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5697           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5698           0, 0
5699         },
5700         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5701           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5702           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5703           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5704         },
5705         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5706           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5707           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5708           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5709         },
5710         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5711           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5712           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5713           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5714         },
5715         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5716           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5717           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5718           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5719         },
5720         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5721           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5722           0, 0
5723         },
5724         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5725           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5726           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5727         },
5728         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5729           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5730           0, 0
5731         },
5732         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5733           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5734           0, 0
5735         },
5736         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5737           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5738           0, 0
5739         },
5740         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5741           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5742           0, 0
5743         },
5744         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5745           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5746           0, 0
5747         },
5748         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5749           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5750           0, 0
5751         },
5752         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5753           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5754           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5755         },
5756         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5757           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5758           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5759         },
5760         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5761           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5762           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5763         },
5764         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5765           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5766           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5767         },
5768         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5769           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5770           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5771         },
5772         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5773           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5774           0, 0
5775         },
5776         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5777           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5778           0, 0
5779         },
5780         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5781           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5782           0, 0
5783         },
5784         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5785           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5786           0, 0
5787         },
5788         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5789           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5790           0, 0
5791         },
5792         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5793           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5794           0, 0
5795         },
5796         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5797           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5798           0, 0
5799         },
5800         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5801           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5802           0, 0
5803         },
5804         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5805           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5806           0, 0
5807         },
5808         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5809           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5810           0, 0
5811         },
5812         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5813           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5814           0, 0
5815         },
5816         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5817           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5818           0, 0
5819         },
5820         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5821           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5822           0, 0
5823         },
5824         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5825           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5826           0, 0
5827         },
5828         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5829           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5830           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5831         },
5832         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5833           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5834           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5835         },
5836         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5837           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5838           0, 0
5839         },
5840         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5841           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5842           0, 0
5843         },
5844         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5845           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5846           0, 0
5847         },
5848         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5849           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5850           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5851         },
5852         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5853           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5854           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5855         },
5856         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5857           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5858           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5859         },
5860         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5861           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5862           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5863         },
5864         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5865           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5866           0, 0
5867         },
5868         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5869           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5870           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5871         },
5872         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5873           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5874           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5875         },
5876         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5877           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5878           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5879         },
5880         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5881           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5882           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5883         },
5884         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5885           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5886           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5887         },
5888         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5889           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5890           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5891         },
5892         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5893           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5894           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5895         },
5896         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5897           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5898           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5899         },
5900         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5901           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5902           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5903         },
5904         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5905           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5906           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5907         },
5908         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5909           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5910           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5911         },
5912         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5913           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5914           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5915         },
5916         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5917           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5918           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5919         },
5920         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5921           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5922           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5923         },
5924         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5925           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5926           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5927         },
5928         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5929           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5930           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5931         },
5932         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5933           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5934           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5935         },
5936         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5937           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5938           0, 0
5939         },
5940         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5941           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5942           0, 0
5943         },
5944         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5945           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5946           0, 0
5947         },
5948         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5949           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5950           0, 0
5951         },
5952         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5953           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5954           0, 0
5955         },
5956         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5957           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5958           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5959         },
5960         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5961           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5962           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5963         },
5964         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5965           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5966           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5967         },
5968         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5969           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5970           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5971         },
5972         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5973           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5974           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5975         },
5976         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5977           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5978           0, 0
5979         },
5980         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5981           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5982           0, 0
5983         },
5984         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5985           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5986           0, 0
5987         },
5988         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5989           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5990           0, 0
5991         },
5992         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5993           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5994           0, 0
5995         },
5996         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5997           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5998           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5999         },
6000         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6001           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6002           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6003         },
6004         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6005           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6006           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6007         },
6008         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6009           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6010           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6011         },
6012         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6013           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6014           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6015         },
6016         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6017           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6018           0, 0
6019         },
6020         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6021           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6022           0, 0
6023         },
6024         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6025           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6026           0, 0
6027         },
6028         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6029           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6030           0, 0
6031         },
6032         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6033           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6034           0, 0
6035         },
6036         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6037           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6038           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6039         },
6040         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6041           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6042           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6043         },
6044         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6045           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6046           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6047         },
6048         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6049           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6050           0, 0
6051         },
6052         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6053           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6054           0, 0
6055         },
6056         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6057           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6058           0, 0
6059         },
6060         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6061           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6062           0, 0
6063         },
6064         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6065           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6066           0, 0
6067         },
6068         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6069           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6070           0, 0
6071         }
6072 };
6073
6074 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6075                                      void *inject_if)
6076 {
6077         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6078         int ret;
6079         struct ta_ras_trigger_error_input block_info = { 0 };
6080
6081         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6082                 return -EINVAL;
6083
6084         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6085                 return -EINVAL;
6086
6087         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6088                 return -EPERM;
6089
6090         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6091               info->head.type)) {
6092                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6093                         ras_gfx_subblocks[info->head.sub_block_index].name,
6094                         info->head.type);
6095                 return -EPERM;
6096         }
6097
6098         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6099               info->head.type)) {
6100                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6101                         ras_gfx_subblocks[info->head.sub_block_index].name,
6102                         info->head.type);
6103                 return -EPERM;
6104         }
6105
6106         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6107         block_info.sub_block_index =
6108                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6109         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6110         block_info.address = info->address;
6111         block_info.value = info->value;
6112
6113         mutex_lock(&adev->grbm_idx_mutex);
6114         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6115         mutex_unlock(&adev->grbm_idx_mutex);
6116
6117         return ret;
6118 }
6119
6120 static const char *vml2_mems[] = {
6121         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6122         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6123         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6124         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6125         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6126         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6127         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6128         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6129         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6130         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6131         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6132         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6133         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6134         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6135         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6136         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6137 };
6138
6139 static const char *vml2_walker_mems[] = {
6140         "UTC_VML2_CACHE_PDE0_MEM0",
6141         "UTC_VML2_CACHE_PDE0_MEM1",
6142         "UTC_VML2_CACHE_PDE1_MEM0",
6143         "UTC_VML2_CACHE_PDE1_MEM1",
6144         "UTC_VML2_CACHE_PDE2_MEM0",
6145         "UTC_VML2_CACHE_PDE2_MEM1",
6146         "UTC_VML2_RDIF_LOG_FIFO",
6147 };
6148
6149 static const char *atc_l2_cache_2m_mems[] = {
6150         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6151         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6152         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6153         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6154 };
6155
6156 static const char *atc_l2_cache_4k_mems[] = {
6157         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6158         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6159         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6160         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6161         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6162         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6163         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6164         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6165         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6166         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6167         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6168         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6169         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6170         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6171         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6172         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6173         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6174         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6175         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6176         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6177         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6178         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6179         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6180         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6181         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6182         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6183         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6184         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6185         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6186         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6187         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6188         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6189 };
6190
6191 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6192                                          struct ras_err_data *err_data)
6193 {
6194         uint32_t i, data;
6195         uint32_t sec_count, ded_count;
6196
6197         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6198         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6199         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6200         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6201         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6202         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6203         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6204         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6205
6206         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6207                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6208                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6209
6210                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6211                 if (sec_count) {
6212                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6213                                  vml2_mems[i], sec_count);
6214                         err_data->ce_count += sec_count;
6215                 }
6216
6217                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6218                 if (ded_count) {
6219                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6220                                  vml2_mems[i], ded_count);
6221                         err_data->ue_count += ded_count;
6222                 }
6223         }
6224
6225         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6226                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6227                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6228
6229                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6230                                                 SEC_COUNT);
6231                 if (sec_count) {
6232                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6233                                  vml2_walker_mems[i], sec_count);
6234                         err_data->ce_count += sec_count;
6235                 }
6236
6237                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6238                                                 DED_COUNT);
6239                 if (ded_count) {
6240                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6241                                  vml2_walker_mems[i], ded_count);
6242                         err_data->ue_count += ded_count;
6243                 }
6244         }
6245
6246         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6247                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6248                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6249
6250                 sec_count = (data & 0x00006000L) >> 0xd;
6251                 if (sec_count) {
6252                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6253                                  atc_l2_cache_2m_mems[i], sec_count);
6254                         err_data->ce_count += sec_count;
6255                 }
6256         }
6257
6258         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6259                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6260                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6261
6262                 sec_count = (data & 0x00006000L) >> 0xd;
6263                 if (sec_count) {
6264                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6265                                  atc_l2_cache_4k_mems[i], sec_count);
6266                         err_data->ce_count += sec_count;
6267                 }
6268
6269                 ded_count = (data & 0x00018000L) >> 0xf;
6270                 if (ded_count) {
6271                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6272                                  atc_l2_cache_4k_mems[i], ded_count);
6273                         err_data->ue_count += ded_count;
6274                 }
6275         }
6276
6277         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6278         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6279         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6280         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6281
6282         return 0;
6283 }
6284
6285 static int gfx_v9_0_ras_error_count(const struct soc15_reg_entry *reg,
6286         uint32_t se_id, uint32_t inst_id, uint32_t value,
6287         uint32_t *sec_count, uint32_t *ded_count)
6288 {
6289         uint32_t i;
6290         uint32_t sec_cnt, ded_cnt;
6291
6292         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6293                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6294                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6295                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6296                         continue;
6297
6298                 sec_cnt = (value &
6299                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6300                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6301                 if (sec_cnt) {
6302                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6303                                 gfx_v9_0_ras_fields[i].name,
6304                                 se_id, inst_id,
6305                                 sec_cnt);
6306                         *sec_count += sec_cnt;
6307                 }
6308
6309                 ded_cnt = (value &
6310                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6311                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6312                 if (ded_cnt) {
6313                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6314                                 gfx_v9_0_ras_fields[i].name,
6315                                 se_id, inst_id,
6316                                 ded_cnt);
6317                         *ded_count += ded_cnt;
6318                 }
6319         }
6320
6321         return 0;
6322 }
6323
6324 static void gfx_v9_0_clear_ras_edc_counter(struct amdgpu_device *adev)
6325 {
6326         int i, j, k;
6327
6328         /* read back registers to clear the counters */
6329         mutex_lock(&adev->grbm_idx_mutex);
6330         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6331                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6332                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6333                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6334                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6335                         }
6336                 }
6337         }
6338         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6339         mutex_unlock(&adev->grbm_idx_mutex);
6340
6341         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6342         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6343         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6344         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6345         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6346         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6347         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6348         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6349
6350         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6351                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6352                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6353         }
6354
6355         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6356                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6357                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6358         }
6359
6360         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6361                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6362                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6363         }
6364
6365         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6366                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6367                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6368         }
6369
6370         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6371         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6372         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6373         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6374 }
6375
6376 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6377                                           void *ras_error_status)
6378 {
6379         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6380         uint32_t sec_count = 0, ded_count = 0;
6381         uint32_t i, j, k;
6382         uint32_t reg_value;
6383
6384         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6385                 return -EINVAL;
6386
6387         err_data->ue_count = 0;
6388         err_data->ce_count = 0;
6389
6390         mutex_lock(&adev->grbm_idx_mutex);
6391
6392         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6393                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6394                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6395                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6396                                 reg_value =
6397                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6398                                 if (reg_value)
6399                                         gfx_v9_0_ras_error_count(&gfx_v9_0_edc_counter_regs[i],
6400                                                         j, k, reg_value,
6401                                                         &sec_count, &ded_count);
6402                         }
6403                 }
6404         }
6405
6406         err_data->ce_count += sec_count;
6407         err_data->ue_count += ded_count;
6408
6409         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6410         mutex_unlock(&adev->grbm_idx_mutex);
6411
6412         gfx_v9_0_query_utc_edc_status(adev, err_data);
6413
6414         return 0;
6415 }
6416
6417 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6418         .name = "gfx_v9_0",
6419         .early_init = gfx_v9_0_early_init,
6420         .late_init = gfx_v9_0_late_init,
6421         .sw_init = gfx_v9_0_sw_init,
6422         .sw_fini = gfx_v9_0_sw_fini,
6423         .hw_init = gfx_v9_0_hw_init,
6424         .hw_fini = gfx_v9_0_hw_fini,
6425         .suspend = gfx_v9_0_suspend,
6426         .resume = gfx_v9_0_resume,
6427         .is_idle = gfx_v9_0_is_idle,
6428         .wait_for_idle = gfx_v9_0_wait_for_idle,
6429         .soft_reset = gfx_v9_0_soft_reset,
6430         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6431         .set_powergating_state = gfx_v9_0_set_powergating_state,
6432         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6433 };
6434
6435 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6436         .type = AMDGPU_RING_TYPE_GFX,
6437         .align_mask = 0xff,
6438         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6439         .support_64bit_ptrs = true,
6440         .vmhub = AMDGPU_GFXHUB_0,
6441         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6442         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6443         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6444         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6445                 5 +  /* COND_EXEC */
6446                 7 +  /* PIPELINE_SYNC */
6447                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6448                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6449                 2 + /* VM_FLUSH */
6450                 8 +  /* FENCE for VM_FLUSH */
6451                 20 + /* GDS switch */
6452                 4 + /* double SWITCH_BUFFER,
6453                        the first COND_EXEC jump to the place just
6454                            prior to this double SWITCH_BUFFER  */
6455                 5 + /* COND_EXEC */
6456                 7 +      /*     HDP_flush */
6457                 4 +      /*     VGT_flush */
6458                 14 + /* CE_META */
6459                 31 + /* DE_META */
6460                 3 + /* CNTX_CTRL */
6461                 5 + /* HDP_INVL */
6462                 8 + 8 + /* FENCE x2 */
6463                 2, /* SWITCH_BUFFER */
6464         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6465         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6466         .emit_fence = gfx_v9_0_ring_emit_fence,
6467         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6468         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6469         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6470         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6471         .test_ring = gfx_v9_0_ring_test_ring,
6472         .test_ib = gfx_v9_0_ring_test_ib,
6473         .insert_nop = amdgpu_ring_insert_nop,
6474         .pad_ib = amdgpu_ring_generic_pad_ib,
6475         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6476         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6477         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6478         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6479         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6480         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6481         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6482         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6483         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6484 };
6485
6486 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6487         .type = AMDGPU_RING_TYPE_COMPUTE,
6488         .align_mask = 0xff,
6489         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6490         .support_64bit_ptrs = true,
6491         .vmhub = AMDGPU_GFXHUB_0,
6492         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6493         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6494         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6495         .emit_frame_size =
6496                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6497                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6498                 5 + /* hdp invalidate */
6499                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6500                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6501                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6502                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6503                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6504         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6505         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6506         .emit_fence = gfx_v9_0_ring_emit_fence,
6507         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6508         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6509         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6510         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6511         .test_ring = gfx_v9_0_ring_test_ring,
6512         .test_ib = gfx_v9_0_ring_test_ib,
6513         .insert_nop = amdgpu_ring_insert_nop,
6514         .pad_ib = amdgpu_ring_generic_pad_ib,
6515         .set_priority = gfx_v9_0_ring_set_priority_compute,
6516         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6517         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6518         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6519 };
6520
6521 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6522         .type = AMDGPU_RING_TYPE_KIQ,
6523         .align_mask = 0xff,
6524         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6525         .support_64bit_ptrs = true,
6526         .vmhub = AMDGPU_GFXHUB_0,
6527         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6528         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6529         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6530         .emit_frame_size =
6531                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6532                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6533                 5 + /* hdp invalidate */
6534                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6535                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6536                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6537                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6538                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6539         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6540         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6541         .test_ring = gfx_v9_0_ring_test_ring,
6542         .insert_nop = amdgpu_ring_insert_nop,
6543         .pad_ib = amdgpu_ring_generic_pad_ib,
6544         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6545         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6546         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6547         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6548 };
6549
6550 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6551 {
6552         int i;
6553
6554         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6555
6556         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6557                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6558
6559         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6560                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6561 }
6562
6563 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6564         .set = gfx_v9_0_set_eop_interrupt_state,
6565         .process = gfx_v9_0_eop_irq,
6566 };
6567
6568 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6569         .set = gfx_v9_0_set_priv_reg_fault_state,
6570         .process = gfx_v9_0_priv_reg_irq,
6571 };
6572
6573 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6574         .set = gfx_v9_0_set_priv_inst_fault_state,
6575         .process = gfx_v9_0_priv_inst_irq,
6576 };
6577
6578 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6579         .set = gfx_v9_0_set_cp_ecc_error_state,
6580         .process = amdgpu_gfx_cp_ecc_error_irq,
6581 };
6582
6583
6584 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6585 {
6586         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6587         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6588
6589         adev->gfx.priv_reg_irq.num_types = 1;
6590         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6591
6592         adev->gfx.priv_inst_irq.num_types = 1;
6593         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6594
6595         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6596         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6597 }
6598
6599 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6600 {
6601         switch (adev->asic_type) {
6602         case CHIP_VEGA10:
6603         case CHIP_VEGA12:
6604         case CHIP_VEGA20:
6605         case CHIP_RAVEN:
6606         case CHIP_ARCTURUS:
6607         case CHIP_RENOIR:
6608                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6609                 break;
6610         default:
6611                 break;
6612         }
6613 }
6614
6615 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6616 {
6617         /* init asci gds info */
6618         switch (adev->asic_type) {
6619         case CHIP_VEGA10:
6620         case CHIP_VEGA12:
6621         case CHIP_VEGA20:
6622                 adev->gds.gds_size = 0x10000;
6623                 break;
6624         case CHIP_RAVEN:
6625         case CHIP_ARCTURUS:
6626                 adev->gds.gds_size = 0x1000;
6627                 break;
6628         default:
6629                 adev->gds.gds_size = 0x10000;
6630                 break;
6631         }
6632
6633         switch (adev->asic_type) {
6634         case CHIP_VEGA10:
6635         case CHIP_VEGA20:
6636                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6637                 break;
6638         case CHIP_VEGA12:
6639                 adev->gds.gds_compute_max_wave_id = 0x27f;
6640                 break;
6641         case CHIP_RAVEN:
6642                 if (adev->rev_id >= 0x8)
6643                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6644                 else
6645                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6646                 break;
6647         case CHIP_ARCTURUS:
6648                 adev->gds.gds_compute_max_wave_id = 0xfff;
6649                 break;
6650         default:
6651                 /* this really depends on the chip */
6652                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6653                 break;
6654         }
6655
6656         adev->gds.gws_size = 64;
6657         adev->gds.oa_size = 16;
6658 }
6659
6660 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6661                                                  u32 bitmap)
6662 {
6663         u32 data;
6664
6665         if (!bitmap)
6666                 return;
6667
6668         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6669         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6670
6671         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6672 }
6673
6674 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6675 {
6676         u32 data, mask;
6677
6678         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6679         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6680
6681         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6682         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6683
6684         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6685
6686         return (~data) & mask;
6687 }
6688
6689 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6690                                  struct amdgpu_cu_info *cu_info)
6691 {
6692         int i, j, k, counter, active_cu_number = 0;
6693         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6694         unsigned disable_masks[4 * 4];
6695
6696         if (!adev || !cu_info)
6697                 return -EINVAL;
6698
6699         /*
6700          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6701          */
6702         if (adev->gfx.config.max_shader_engines *
6703                 adev->gfx.config.max_sh_per_se > 16)
6704                 return -EINVAL;
6705
6706         amdgpu_gfx_parse_disable_cu(disable_masks,
6707                                     adev->gfx.config.max_shader_engines,
6708                                     adev->gfx.config.max_sh_per_se);
6709
6710         mutex_lock(&adev->grbm_idx_mutex);
6711         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6712                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6713                         mask = 1;
6714                         ao_bitmap = 0;
6715                         counter = 0;
6716                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6717                         gfx_v9_0_set_user_cu_inactive_bitmap(
6718                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6719                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6720
6721                         /*
6722                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6723                          * 4x4 size array, and it's usually suitable for Vega
6724                          * ASICs which has 4*2 SE/SH layout.
6725                          * But for Arcturus, SE/SH layout is changed to 8*1.
6726                          * To mostly reduce the impact, we make it compatible
6727                          * with current bitmap array as below:
6728                          *    SE4,SH0 --> bitmap[0][1]
6729                          *    SE5,SH0 --> bitmap[1][1]
6730                          *    SE6,SH0 --> bitmap[2][1]
6731                          *    SE7,SH0 --> bitmap[3][1]
6732                          */
6733                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6734
6735                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6736                                 if (bitmap & mask) {
6737                                         if (counter < adev->gfx.config.max_cu_per_sh)
6738                                                 ao_bitmap |= mask;
6739                                         counter ++;
6740                                 }
6741                                 mask <<= 1;
6742                         }
6743                         active_cu_number += counter;
6744                         if (i < 2 && j < 2)
6745                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6746                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6747                 }
6748         }
6749         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6750         mutex_unlock(&adev->grbm_idx_mutex);
6751
6752         cu_info->number = active_cu_number;
6753         cu_info->ao_cu_mask = ao_cu_mask;
6754         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6755
6756         return 0;
6757 }
6758
6759 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6760 {
6761         .type = AMD_IP_BLOCK_TYPE_GFX,
6762         .major = 9,
6763         .minor = 0,
6764         .rev = 0,
6765         .funcs = &gfx_v9_0_ip_funcs,
6766 };