Backmerge i915 security patches from commit 'ea0b163b13ff' into drm-next
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120
121 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
123 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
133
134 struct ras_gfx_subblock_reg {
135         const char *name;
136         uint32_t hwip;
137         uint32_t inst;
138         uint32_t seg;
139         uint32_t reg_offset;
140         uint32_t sec_count_mask;
141         uint32_t sec_count_shift;
142         uint32_t ded_count_mask;
143         uint32_t ded_count_shift;
144 };
145
146 enum ta_ras_gfx_subblock {
147         /*CPC*/
148         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
149         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
150         TA_RAS_BLOCK__GFX_CPC_UCODE,
151         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
152         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
153         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
154         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
155         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
156         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
157         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
158         /* CPF*/
159         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
160         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
161         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
162         TA_RAS_BLOCK__GFX_CPF_TAG,
163         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
164         /* CPG*/
165         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
166         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
167         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
168         TA_RAS_BLOCK__GFX_CPG_TAG,
169         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
170         /* GDS*/
171         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
172         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
173         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
174         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
175         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
176         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
177         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
178         /* SPI*/
179         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
180         /* SQ*/
181         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
182         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
183         TA_RAS_BLOCK__GFX_SQ_LDS_D,
184         TA_RAS_BLOCK__GFX_SQ_LDS_I,
185         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
186         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
187         /* SQC (3 ranges)*/
188         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
189         /* SQC range 0*/
190         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
192                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
193         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
194         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
195         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
196         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
197         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
198         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
199         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
200                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201         /* SQC range 1*/
202         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
203         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
204                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
206         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
208         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
209         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
213         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
214                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215         /* SQC range 2*/
216         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
217         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
218                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
220         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
221         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
222         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
223         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
224         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
225         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
226         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
227         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
228                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
230         /* TA*/
231         TA_RAS_BLOCK__GFX_TA_INDEX_START,
232         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
233         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
234         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
235         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
236         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
237         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
238         /* TCA*/
239         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
240         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
241         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
242         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
243         /* TCC (5 sub-ranges)*/
244         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
245         /* TCC range 0*/
246         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
248         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
249         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
250         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
251         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
252         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
253         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
254         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
255         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
256         /* TCC range 1*/
257         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
258         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
259         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
260         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
261                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262         /* TCC range 2*/
263         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
264         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
265         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
266         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
267         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
268         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
269         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
270         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
271         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
272         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
273                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274         /* TCC range 3*/
275         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
276         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
277         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
278         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
279                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280         /* TCC range 4*/
281         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
282         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
283                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
285         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
286                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
288         /* TCI*/
289         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
290         /* TCP*/
291         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
292         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
293         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
294         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
295         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
296         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
297         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
298         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
299         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
300         /* TD*/
301         TA_RAS_BLOCK__GFX_TD_INDEX_START,
302         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
303         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
304         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
305         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
306         /* EA (3 sub-ranges)*/
307         TA_RAS_BLOCK__GFX_EA_INDEX_START,
308         /* EA range 0*/
309         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
310         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
311         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
312         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
313         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
314         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
315         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
316         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
318         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
319         /* EA range 1*/
320         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
321         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
322         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
323         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
324         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
325         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
326         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
327         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
328         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
329         /* EA range 2*/
330         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
331         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
332         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
333         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
334         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
335         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
336         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
337         /* UTC VM L2 bank*/
338         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
339         /* UTC VM walker*/
340         TA_RAS_BLOCK__UTC_VML2_WALKER,
341         /* UTC ATC L2 2MB cache*/
342         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
343         /* UTC ATC L2 4KB cache*/
344         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
345         TA_RAS_BLOCK__GFX_MAX
346 };
347
348 struct ras_gfx_subblock {
349         unsigned char *name;
350         int ta_subblock;
351         int hw_supported_error_type;
352         int sw_supported_error_type;
353 };
354
355 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
356         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
357                 #subblock,                                                     \
358                 TA_RAS_BLOCK__##subblock,                                      \
359                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
360                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
361         }
362
363 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
366         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
375         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
377         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
378         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
380         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
381                              0),
382         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
383                              0),
384         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
386         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
388         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
390         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
392                              0, 0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
394                              0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
396                              0, 0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
400                              0, 0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
404                              1),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
406                              0, 0, 0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
418                              0, 0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
424                              0, 0, 0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
426                              0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
430                              0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
432                              0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
434                              0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
436                              0, 0),
437         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
438                              0),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
440         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
448                              1),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
450                              1),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
452                              1),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
454                              0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
456                              0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
469                              0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
472                              0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
474                              0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
476                              0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
479         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
486         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
509         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
510         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
511 };
512
513 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
514 {
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
535 };
536
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
538 {
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
557 };
558
559 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
560 {
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
562         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
572 };
573
574 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
575 {
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
600 };
601
602 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
603 {
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
611 };
612
613 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
614 {
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
634 };
635
636 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
637 {
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
639         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
640         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
650 };
651
652 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
653 {
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
657 };
658
659 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
660 {
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
677 };
678
679 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
680 {
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
682         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
694 };
695
696 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
697 {
698         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
699         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
703         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
707 };
708
709 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
710 {
711         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
712         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
713         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
714         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
715         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
716         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
717         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
718         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
719 };
720
721 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
722 {
723         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
724         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
725         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
726         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
727         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
728         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
729         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
730         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
731 };
732
733 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
734 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
735 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
736 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
737
738 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
739 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
740 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
741 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
742 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
743                                  struct amdgpu_cu_info *cu_info);
744 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
745 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
746 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
747 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
748 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
749                                           void *ras_error_status);
750 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
751                                      void *inject_if);
752
753 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
754 {
755         switch (adev->asic_type) {
756         case CHIP_VEGA10:
757                 soc15_program_register_sequence(adev,
758                                                 golden_settings_gc_9_0,
759                                                 ARRAY_SIZE(golden_settings_gc_9_0));
760                 soc15_program_register_sequence(adev,
761                                                 golden_settings_gc_9_0_vg10,
762                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
763                 break;
764         case CHIP_VEGA12:
765                 soc15_program_register_sequence(adev,
766                                                 golden_settings_gc_9_2_1,
767                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
768                 soc15_program_register_sequence(adev,
769                                                 golden_settings_gc_9_2_1_vg12,
770                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
771                 break;
772         case CHIP_VEGA20:
773                 soc15_program_register_sequence(adev,
774                                                 golden_settings_gc_9_0,
775                                                 ARRAY_SIZE(golden_settings_gc_9_0));
776                 soc15_program_register_sequence(adev,
777                                                 golden_settings_gc_9_0_vg20,
778                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
779                 break;
780         case CHIP_ARCTURUS:
781                 soc15_program_register_sequence(adev,
782                                                 golden_settings_gc_9_4_1_arct,
783                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
784                 break;
785         case CHIP_RAVEN:
786                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
787                                                 ARRAY_SIZE(golden_settings_gc_9_1));
788                 if (adev->rev_id >= 8)
789                         soc15_program_register_sequence(adev,
790                                                         golden_settings_gc_9_1_rv2,
791                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
792                 else
793                         soc15_program_register_sequence(adev,
794                                                         golden_settings_gc_9_1_rv1,
795                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
796                 break;
797          case CHIP_RENOIR:
798                 soc15_program_register_sequence(adev,
799                                                 golden_settings_gc_9_1_rn,
800                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
801                 return; /* for renoir, don't need common goldensetting */
802         default:
803                 break;
804         }
805
806         if (adev->asic_type != CHIP_ARCTURUS)
807                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
808                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
809 }
810
811 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
812 {
813         adev->gfx.scratch.num_reg = 8;
814         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
815         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
816 }
817
818 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
819                                        bool wc, uint32_t reg, uint32_t val)
820 {
821         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
822         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
823                                 WRITE_DATA_DST_SEL(0) |
824                                 (wc ? WR_CONFIRM : 0));
825         amdgpu_ring_write(ring, reg);
826         amdgpu_ring_write(ring, 0);
827         amdgpu_ring_write(ring, val);
828 }
829
830 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
831                                   int mem_space, int opt, uint32_t addr0,
832                                   uint32_t addr1, uint32_t ref, uint32_t mask,
833                                   uint32_t inv)
834 {
835         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
836         amdgpu_ring_write(ring,
837                                  /* memory (1) or register (0) */
838                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
839                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
840                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
841                                  WAIT_REG_MEM_ENGINE(eng_sel)));
842
843         if (mem_space)
844                 BUG_ON(addr0 & 0x3); /* Dword align */
845         amdgpu_ring_write(ring, addr0);
846         amdgpu_ring_write(ring, addr1);
847         amdgpu_ring_write(ring, ref);
848         amdgpu_ring_write(ring, mask);
849         amdgpu_ring_write(ring, inv); /* poll interval */
850 }
851
852 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
853 {
854         struct amdgpu_device *adev = ring->adev;
855         uint32_t scratch;
856         uint32_t tmp = 0;
857         unsigned i;
858         int r;
859
860         r = amdgpu_gfx_scratch_get(adev, &scratch);
861         if (r)
862                 return r;
863
864         WREG32(scratch, 0xCAFEDEAD);
865         r = amdgpu_ring_alloc(ring, 3);
866         if (r)
867                 goto error_free_scratch;
868
869         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
870         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
871         amdgpu_ring_write(ring, 0xDEADBEEF);
872         amdgpu_ring_commit(ring);
873
874         for (i = 0; i < adev->usec_timeout; i++) {
875                 tmp = RREG32(scratch);
876                 if (tmp == 0xDEADBEEF)
877                         break;
878                 udelay(1);
879         }
880
881         if (i >= adev->usec_timeout)
882                 r = -ETIMEDOUT;
883
884 error_free_scratch:
885         amdgpu_gfx_scratch_free(adev, scratch);
886         return r;
887 }
888
889 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
890 {
891         struct amdgpu_device *adev = ring->adev;
892         struct amdgpu_ib ib;
893         struct dma_fence *f = NULL;
894
895         unsigned index;
896         uint64_t gpu_addr;
897         uint32_t tmp;
898         long r;
899
900         r = amdgpu_device_wb_get(adev, &index);
901         if (r)
902                 return r;
903
904         gpu_addr = adev->wb.gpu_addr + (index * 4);
905         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
906         memset(&ib, 0, sizeof(ib));
907         r = amdgpu_ib_get(adev, NULL, 16, &ib);
908         if (r)
909                 goto err1;
910
911         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
912         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
913         ib.ptr[2] = lower_32_bits(gpu_addr);
914         ib.ptr[3] = upper_32_bits(gpu_addr);
915         ib.ptr[4] = 0xDEADBEEF;
916         ib.length_dw = 5;
917
918         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
919         if (r)
920                 goto err2;
921
922         r = dma_fence_wait_timeout(f, false, timeout);
923         if (r == 0) {
924                 r = -ETIMEDOUT;
925                 goto err2;
926         } else if (r < 0) {
927                 goto err2;
928         }
929
930         tmp = adev->wb.wb[index];
931         if (tmp == 0xDEADBEEF)
932                 r = 0;
933         else
934                 r = -EINVAL;
935
936 err2:
937         amdgpu_ib_free(adev, &ib, NULL);
938         dma_fence_put(f);
939 err1:
940         amdgpu_device_wb_free(adev, index);
941         return r;
942 }
943
944
945 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
946 {
947         release_firmware(adev->gfx.pfp_fw);
948         adev->gfx.pfp_fw = NULL;
949         release_firmware(adev->gfx.me_fw);
950         adev->gfx.me_fw = NULL;
951         release_firmware(adev->gfx.ce_fw);
952         adev->gfx.ce_fw = NULL;
953         release_firmware(adev->gfx.rlc_fw);
954         adev->gfx.rlc_fw = NULL;
955         release_firmware(adev->gfx.mec_fw);
956         adev->gfx.mec_fw = NULL;
957         release_firmware(adev->gfx.mec2_fw);
958         adev->gfx.mec2_fw = NULL;
959
960         kfree(adev->gfx.rlc.register_list_format);
961 }
962
963 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
964 {
965         const struct rlc_firmware_header_v2_1 *rlc_hdr;
966
967         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
968         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
969         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
970         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
971         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
972         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
973         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
974         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
975         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
976         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
977         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
978         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
979         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
980         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
981                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
982 }
983
984 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
985 {
986         adev->gfx.me_fw_write_wait = false;
987         adev->gfx.mec_fw_write_wait = false;
988
989         if ((adev->gfx.mec_fw_version < 0x000001a5) ||
990             (adev->gfx.mec_feature_version < 46) ||
991             (adev->gfx.pfp_fw_version < 0x000000b7) ||
992             (adev->gfx.pfp_feature_version < 46))
993                 DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
994                               GRBM requires 1-cycle delay in cp firmware\n");
995
996         switch (adev->asic_type) {
997         case CHIP_VEGA10:
998                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
999                     (adev->gfx.me_feature_version >= 42) &&
1000                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1001                     (adev->gfx.pfp_feature_version >= 42))
1002                         adev->gfx.me_fw_write_wait = true;
1003
1004                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1005                     (adev->gfx.mec_feature_version >= 42))
1006                         adev->gfx.mec_fw_write_wait = true;
1007                 break;
1008         case CHIP_VEGA12:
1009                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1010                     (adev->gfx.me_feature_version >= 44) &&
1011                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1012                     (adev->gfx.pfp_feature_version >= 44))
1013                         adev->gfx.me_fw_write_wait = true;
1014
1015                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1016                     (adev->gfx.mec_feature_version >= 44))
1017                         adev->gfx.mec_fw_write_wait = true;
1018                 break;
1019         case CHIP_VEGA20:
1020                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1021                     (adev->gfx.me_feature_version >= 44) &&
1022                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1023                     (adev->gfx.pfp_feature_version >= 44))
1024                         adev->gfx.me_fw_write_wait = true;
1025
1026                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1027                     (adev->gfx.mec_feature_version >= 44))
1028                         adev->gfx.mec_fw_write_wait = true;
1029                 break;
1030         case CHIP_RAVEN:
1031                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1032                     (adev->gfx.me_feature_version >= 42) &&
1033                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1034                     (adev->gfx.pfp_feature_version >= 42))
1035                         adev->gfx.me_fw_write_wait = true;
1036
1037                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1038                     (adev->gfx.mec_feature_version >= 42))
1039                         adev->gfx.mec_fw_write_wait = true;
1040                 break;
1041         default:
1042                 break;
1043         }
1044 }
1045
1046 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1047 {
1048         switch (adev->asic_type) {
1049         case CHIP_VEGA10:
1050         case CHIP_VEGA12:
1051         case CHIP_VEGA20:
1052                 break;
1053         case CHIP_RAVEN:
1054                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1055                         &&((adev->gfx.rlc_fw_version != 106 &&
1056                              adev->gfx.rlc_fw_version < 531) ||
1057                             (adev->gfx.rlc_fw_version == 53815) ||
1058                             (adev->gfx.rlc_feature_version < 1) ||
1059                             !adev->gfx.rlc.is_rlc_v2_1))
1060                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1061
1062                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1063                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1064                                 AMD_PG_SUPPORT_CP |
1065                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1066                 break;
1067         case CHIP_RENOIR:
1068                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1069                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1070                                 AMD_PG_SUPPORT_CP |
1071                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1072                 break;
1073         default:
1074                 break;
1075         }
1076 }
1077
1078 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1079                                           const char *chip_name)
1080 {
1081         char fw_name[30];
1082         int err;
1083         struct amdgpu_firmware_info *info = NULL;
1084         const struct common_firmware_header *header = NULL;
1085         const struct gfx_firmware_header_v1_0 *cp_hdr;
1086
1087         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1088         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1089         if (err)
1090                 goto out;
1091         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1092         if (err)
1093                 goto out;
1094         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1095         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1096         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1097
1098         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1099         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1100         if (err)
1101                 goto out;
1102         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1103         if (err)
1104                 goto out;
1105         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1106         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1107         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1108
1109         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1110         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1111         if (err)
1112                 goto out;
1113         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1114         if (err)
1115                 goto out;
1116         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1117         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1118         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1119
1120         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1121                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1122                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1123                 info->fw = adev->gfx.pfp_fw;
1124                 header = (const struct common_firmware_header *)info->fw->data;
1125                 adev->firmware.fw_size +=
1126                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1127
1128                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1129                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1130                 info->fw = adev->gfx.me_fw;
1131                 header = (const struct common_firmware_header *)info->fw->data;
1132                 adev->firmware.fw_size +=
1133                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1134
1135                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1136                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1137                 info->fw = adev->gfx.ce_fw;
1138                 header = (const struct common_firmware_header *)info->fw->data;
1139                 adev->firmware.fw_size +=
1140                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1141         }
1142
1143 out:
1144         if (err) {
1145                 dev_err(adev->dev,
1146                         "gfx9: Failed to load firmware \"%s\"\n",
1147                         fw_name);
1148                 release_firmware(adev->gfx.pfp_fw);
1149                 adev->gfx.pfp_fw = NULL;
1150                 release_firmware(adev->gfx.me_fw);
1151                 adev->gfx.me_fw = NULL;
1152                 release_firmware(adev->gfx.ce_fw);
1153                 adev->gfx.ce_fw = NULL;
1154         }
1155         return err;
1156 }
1157
1158 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1159                                           const char *chip_name)
1160 {
1161         char fw_name[30];
1162         int err;
1163         struct amdgpu_firmware_info *info = NULL;
1164         const struct common_firmware_header *header = NULL;
1165         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1166         unsigned int *tmp = NULL;
1167         unsigned int i = 0;
1168         uint16_t version_major;
1169         uint16_t version_minor;
1170         uint32_t smu_version;
1171
1172         /*
1173          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1174          * instead of picasso_rlc.bin.
1175          * Judgment method:
1176          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1177          *          or revision >= 0xD8 && revision <= 0xDF
1178          * otherwise is PCO FP5
1179          */
1180         if (!strcmp(chip_name, "picasso") &&
1181                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1182                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1183                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1184         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1185                 (smu_version >= 0x41e2b))
1186                 /**
1187                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1188                 */
1189                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1190         else
1191                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1192         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1193         if (err)
1194                 goto out;
1195         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1196         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1197
1198         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1199         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1200         if (version_major == 2 && version_minor == 1)
1201                 adev->gfx.rlc.is_rlc_v2_1 = true;
1202
1203         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1204         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1205         adev->gfx.rlc.save_and_restore_offset =
1206                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1207         adev->gfx.rlc.clear_state_descriptor_offset =
1208                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1209         adev->gfx.rlc.avail_scratch_ram_locations =
1210                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1211         adev->gfx.rlc.reg_restore_list_size =
1212                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1213         adev->gfx.rlc.reg_list_format_start =
1214                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1215         adev->gfx.rlc.reg_list_format_separate_start =
1216                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1217         adev->gfx.rlc.starting_offsets_start =
1218                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1219         adev->gfx.rlc.reg_list_format_size_bytes =
1220                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1221         adev->gfx.rlc.reg_list_size_bytes =
1222                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1223         adev->gfx.rlc.register_list_format =
1224                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1225                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1226         if (!adev->gfx.rlc.register_list_format) {
1227                 err = -ENOMEM;
1228                 goto out;
1229         }
1230
1231         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1232                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1233         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1234                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1235
1236         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1237
1238         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1239                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1240         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1241                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1242
1243         if (adev->gfx.rlc.is_rlc_v2_1)
1244                 gfx_v9_0_init_rlc_ext_microcode(adev);
1245
1246         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1247                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1248                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1249                 info->fw = adev->gfx.rlc_fw;
1250                 header = (const struct common_firmware_header *)info->fw->data;
1251                 adev->firmware.fw_size +=
1252                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1253
1254                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1255                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1256                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1257                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1258                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1259                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1260                         info->fw = adev->gfx.rlc_fw;
1261                         adev->firmware.fw_size +=
1262                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1263
1264                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1265                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1266                         info->fw = adev->gfx.rlc_fw;
1267                         adev->firmware.fw_size +=
1268                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1269
1270                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1271                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1272                         info->fw = adev->gfx.rlc_fw;
1273                         adev->firmware.fw_size +=
1274                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1275                 }
1276         }
1277
1278 out:
1279         if (err) {
1280                 dev_err(adev->dev,
1281                         "gfx9: Failed to load firmware \"%s\"\n",
1282                         fw_name);
1283                 release_firmware(adev->gfx.rlc_fw);
1284                 adev->gfx.rlc_fw = NULL;
1285         }
1286         return err;
1287 }
1288
1289 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1290                                           const char *chip_name)
1291 {
1292         char fw_name[30];
1293         int err;
1294         struct amdgpu_firmware_info *info = NULL;
1295         const struct common_firmware_header *header = NULL;
1296         const struct gfx_firmware_header_v1_0 *cp_hdr;
1297
1298         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1299         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1300         if (err)
1301                 goto out;
1302         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1303         if (err)
1304                 goto out;
1305         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1306         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1307         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1308
1309
1310         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1311         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1312         if (!err) {
1313                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1314                 if (err)
1315                         goto out;
1316                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1317                 adev->gfx.mec2_fw->data;
1318                 adev->gfx.mec2_fw_version =
1319                 le32_to_cpu(cp_hdr->header.ucode_version);
1320                 adev->gfx.mec2_feature_version =
1321                 le32_to_cpu(cp_hdr->ucode_feature_version);
1322         } else {
1323                 err = 0;
1324                 adev->gfx.mec2_fw = NULL;
1325         }
1326
1327         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1328                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1329                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1330                 info->fw = adev->gfx.mec_fw;
1331                 header = (const struct common_firmware_header *)info->fw->data;
1332                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1333                 adev->firmware.fw_size +=
1334                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1335
1336                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1337                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1338                 info->fw = adev->gfx.mec_fw;
1339                 adev->firmware.fw_size +=
1340                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1341
1342                 if (adev->gfx.mec2_fw) {
1343                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1344                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1345                         info->fw = adev->gfx.mec2_fw;
1346                         header = (const struct common_firmware_header *)info->fw->data;
1347                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1348                         adev->firmware.fw_size +=
1349                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1350
1351                         /* TODO: Determine if MEC2 JT FW loading can be removed
1352                                  for all GFX V9 asic and above */
1353                         if (adev->asic_type != CHIP_ARCTURUS &&
1354                             adev->asic_type != CHIP_RENOIR) {
1355                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1356                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1357                                 info->fw = adev->gfx.mec2_fw;
1358                                 adev->firmware.fw_size +=
1359                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1360                                         PAGE_SIZE);
1361                         }
1362                 }
1363         }
1364
1365 out:
1366         gfx_v9_0_check_if_need_gfxoff(adev);
1367         gfx_v9_0_check_fw_write_wait(adev);
1368         if (err) {
1369                 dev_err(adev->dev,
1370                         "gfx9: Failed to load firmware \"%s\"\n",
1371                         fw_name);
1372                 release_firmware(adev->gfx.mec_fw);
1373                 adev->gfx.mec_fw = NULL;
1374                 release_firmware(adev->gfx.mec2_fw);
1375                 adev->gfx.mec2_fw = NULL;
1376         }
1377         return err;
1378 }
1379
1380 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1381 {
1382         const char *chip_name;
1383         int r;
1384
1385         DRM_DEBUG("\n");
1386
1387         switch (adev->asic_type) {
1388         case CHIP_VEGA10:
1389                 chip_name = "vega10";
1390                 break;
1391         case CHIP_VEGA12:
1392                 chip_name = "vega12";
1393                 break;
1394         case CHIP_VEGA20:
1395                 chip_name = "vega20";
1396                 break;
1397         case CHIP_RAVEN:
1398                 if (adev->rev_id >= 8)
1399                         chip_name = "raven2";
1400                 else if (adev->pdev->device == 0x15d8)
1401                         chip_name = "picasso";
1402                 else
1403                         chip_name = "raven";
1404                 break;
1405         case CHIP_ARCTURUS:
1406                 chip_name = "arcturus";
1407                 break;
1408         case CHIP_RENOIR:
1409                 chip_name = "renoir";
1410                 break;
1411         default:
1412                 BUG();
1413         }
1414
1415         /* No CPG in Arcturus */
1416         if (adev->asic_type != CHIP_ARCTURUS) {
1417                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1418                 if (r)
1419                         return r;
1420         }
1421
1422         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1423         if (r)
1424                 return r;
1425
1426         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1427         if (r)
1428                 return r;
1429
1430         return r;
1431 }
1432
1433 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1434 {
1435         u32 count = 0;
1436         const struct cs_section_def *sect = NULL;
1437         const struct cs_extent_def *ext = NULL;
1438
1439         /* begin clear state */
1440         count += 2;
1441         /* context control state */
1442         count += 3;
1443
1444         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1445                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1446                         if (sect->id == SECT_CONTEXT)
1447                                 count += 2 + ext->reg_count;
1448                         else
1449                                 return 0;
1450                 }
1451         }
1452
1453         /* end clear state */
1454         count += 2;
1455         /* clear state */
1456         count += 2;
1457
1458         return count;
1459 }
1460
1461 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1462                                     volatile u32 *buffer)
1463 {
1464         u32 count = 0, i;
1465         const struct cs_section_def *sect = NULL;
1466         const struct cs_extent_def *ext = NULL;
1467
1468         if (adev->gfx.rlc.cs_data == NULL)
1469                 return;
1470         if (buffer == NULL)
1471                 return;
1472
1473         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1474         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1475
1476         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1477         buffer[count++] = cpu_to_le32(0x80000000);
1478         buffer[count++] = cpu_to_le32(0x80000000);
1479
1480         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1481                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1482                         if (sect->id == SECT_CONTEXT) {
1483                                 buffer[count++] =
1484                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1485                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1486                                                 PACKET3_SET_CONTEXT_REG_START);
1487                                 for (i = 0; i < ext->reg_count; i++)
1488                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1489                         } else {
1490                                 return;
1491                         }
1492                 }
1493         }
1494
1495         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1496         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1497
1498         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1499         buffer[count++] = cpu_to_le32(0);
1500 }
1501
1502 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1503 {
1504         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1505         uint32_t pg_always_on_cu_num = 2;
1506         uint32_t always_on_cu_num;
1507         uint32_t i, j, k;
1508         uint32_t mask, cu_bitmap, counter;
1509
1510         if (adev->flags & AMD_IS_APU)
1511                 always_on_cu_num = 4;
1512         else if (adev->asic_type == CHIP_VEGA12)
1513                 always_on_cu_num = 8;
1514         else
1515                 always_on_cu_num = 12;
1516
1517         mutex_lock(&adev->grbm_idx_mutex);
1518         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1519                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1520                         mask = 1;
1521                         cu_bitmap = 0;
1522                         counter = 0;
1523                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1524
1525                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1526                                 if (cu_info->bitmap[i][j] & mask) {
1527                                         if (counter == pg_always_on_cu_num)
1528                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1529                                         if (counter < always_on_cu_num)
1530                                                 cu_bitmap |= mask;
1531                                         else
1532                                                 break;
1533                                         counter++;
1534                                 }
1535                                 mask <<= 1;
1536                         }
1537
1538                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1539                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1540                 }
1541         }
1542         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1543         mutex_unlock(&adev->grbm_idx_mutex);
1544 }
1545
1546 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1547 {
1548         uint32_t data;
1549
1550         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1551         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1552         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1553         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1554         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1555
1556         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1557         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1558
1559         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1560         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1561
1562         mutex_lock(&adev->grbm_idx_mutex);
1563         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1564         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1565         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1566
1567         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1568         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1569         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1570         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1571         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1572
1573         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1574         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1575         data &= 0x0000FFFF;
1576         data |= 0x00C00000;
1577         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1578
1579         /*
1580          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1581          * programmed in gfx_v9_0_init_always_on_cu_mask()
1582          */
1583
1584         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1585          * but used for RLC_LB_CNTL configuration */
1586         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1587         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1588         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1589         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1590         mutex_unlock(&adev->grbm_idx_mutex);
1591
1592         gfx_v9_0_init_always_on_cu_mask(adev);
1593 }
1594
1595 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1596 {
1597         uint32_t data;
1598
1599         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1600         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1601         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1602         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1603         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1604
1605         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1606         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1607
1608         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1609         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1610
1611         mutex_lock(&adev->grbm_idx_mutex);
1612         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1613         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1614         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1615
1616         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1617         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1618         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1619         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1620         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1621
1622         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1623         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1624         data &= 0x0000FFFF;
1625         data |= 0x00C00000;
1626         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1627
1628         /*
1629          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1630          * programmed in gfx_v9_0_init_always_on_cu_mask()
1631          */
1632
1633         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1634          * but used for RLC_LB_CNTL configuration */
1635         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1636         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1637         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1638         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1639         mutex_unlock(&adev->grbm_idx_mutex);
1640
1641         gfx_v9_0_init_always_on_cu_mask(adev);
1642 }
1643
1644 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1645 {
1646         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1647 }
1648
1649 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1650 {
1651         return 5;
1652 }
1653
1654 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1655 {
1656         const struct cs_section_def *cs_data;
1657         int r;
1658
1659         adev->gfx.rlc.cs_data = gfx9_cs_data;
1660
1661         cs_data = adev->gfx.rlc.cs_data;
1662
1663         if (cs_data) {
1664                 /* init clear state block */
1665                 r = amdgpu_gfx_rlc_init_csb(adev);
1666                 if (r)
1667                         return r;
1668         }
1669
1670         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1671                 /* TODO: double check the cp_table_size for RV */
1672                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1673                 r = amdgpu_gfx_rlc_init_cpt(adev);
1674                 if (r)
1675                         return r;
1676         }
1677
1678         switch (adev->asic_type) {
1679         case CHIP_RAVEN:
1680                 gfx_v9_0_init_lbpw(adev);
1681                 break;
1682         case CHIP_VEGA20:
1683                 gfx_v9_4_init_lbpw(adev);
1684                 break;
1685         default:
1686                 break;
1687         }
1688
1689         return 0;
1690 }
1691
1692 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1693 {
1694         int r;
1695
1696         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1697         if (unlikely(r != 0))
1698                 return r;
1699
1700         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1701                         AMDGPU_GEM_DOMAIN_VRAM);
1702         if (!r)
1703                 adev->gfx.rlc.clear_state_gpu_addr =
1704                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1705
1706         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1707
1708         return r;
1709 }
1710
1711 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1712 {
1713         int r;
1714
1715         if (!adev->gfx.rlc.clear_state_obj)
1716                 return;
1717
1718         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1719         if (likely(r == 0)) {
1720                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1721                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1722         }
1723 }
1724
1725 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1726 {
1727         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1728         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1729 }
1730
1731 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1732 {
1733         int r;
1734         u32 *hpd;
1735         const __le32 *fw_data;
1736         unsigned fw_size;
1737         u32 *fw;
1738         size_t mec_hpd_size;
1739
1740         const struct gfx_firmware_header_v1_0 *mec_hdr;
1741
1742         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1743
1744         /* take ownership of the relevant compute queues */
1745         amdgpu_gfx_compute_queue_acquire(adev);
1746         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1747
1748         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1749                                       AMDGPU_GEM_DOMAIN_VRAM,
1750                                       &adev->gfx.mec.hpd_eop_obj,
1751                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1752                                       (void **)&hpd);
1753         if (r) {
1754                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1755                 gfx_v9_0_mec_fini(adev);
1756                 return r;
1757         }
1758
1759         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1760
1761         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1762         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1763
1764         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1765
1766         fw_data = (const __le32 *)
1767                 (adev->gfx.mec_fw->data +
1768                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1769         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1770
1771         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1772                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1773                                       &adev->gfx.mec.mec_fw_obj,
1774                                       &adev->gfx.mec.mec_fw_gpu_addr,
1775                                       (void **)&fw);
1776         if (r) {
1777                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1778                 gfx_v9_0_mec_fini(adev);
1779                 return r;
1780         }
1781
1782         memcpy(fw, fw_data, fw_size);
1783
1784         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1785         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1786
1787         return 0;
1788 }
1789
1790 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1791 {
1792         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1793                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1794                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1795                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1796                 (SQ_IND_INDEX__FORCE_READ_MASK));
1797         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1798 }
1799
1800 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1801                            uint32_t wave, uint32_t thread,
1802                            uint32_t regno, uint32_t num, uint32_t *out)
1803 {
1804         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1805                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1806                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1807                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1808                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1809                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1810                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1811         while (num--)
1812                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1813 }
1814
1815 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1816 {
1817         /* type 1 wave data */
1818         dst[(*no_fields)++] = 1;
1819         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1820         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1821         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1822         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1823         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1824         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1825         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1826         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1827         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1828         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1829         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1830         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1831         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1832         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1833 }
1834
1835 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1836                                      uint32_t wave, uint32_t start,
1837                                      uint32_t size, uint32_t *dst)
1838 {
1839         wave_read_regs(
1840                 adev, simd, wave, 0,
1841                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1842 }
1843
1844 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1845                                      uint32_t wave, uint32_t thread,
1846                                      uint32_t start, uint32_t size,
1847                                      uint32_t *dst)
1848 {
1849         wave_read_regs(
1850                 adev, simd, wave, thread,
1851                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1852 }
1853
1854 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1855                                   u32 me, u32 pipe, u32 q, u32 vm)
1856 {
1857         soc15_grbm_select(adev, me, pipe, q, vm);
1858 }
1859
1860 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1861         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1862         .select_se_sh = &gfx_v9_0_select_se_sh,
1863         .read_wave_data = &gfx_v9_0_read_wave_data,
1864         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1865         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1866         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1867         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1868         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1869 };
1870
1871 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1872 {
1873         u32 gb_addr_config;
1874         int err;
1875
1876         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1877
1878         switch (adev->asic_type) {
1879         case CHIP_VEGA10:
1880                 adev->gfx.config.max_hw_contexts = 8;
1881                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1882                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1883                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1884                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1885                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1886                 break;
1887         case CHIP_VEGA12:
1888                 adev->gfx.config.max_hw_contexts = 8;
1889                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1890                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1891                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1892                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1893                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1894                 DRM_INFO("fix gfx.config for vega12\n");
1895                 break;
1896         case CHIP_VEGA20:
1897                 adev->gfx.config.max_hw_contexts = 8;
1898                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1902                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1903                 gb_addr_config &= ~0xf3e777ff;
1904                 gb_addr_config |= 0x22014042;
1905                 /* check vbios table if gpu info is not available */
1906                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1907                 if (err)
1908                         return err;
1909                 break;
1910         case CHIP_RAVEN:
1911                 adev->gfx.config.max_hw_contexts = 8;
1912                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1913                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1914                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1915                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1916                 if (adev->rev_id >= 8)
1917                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1918                 else
1919                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1920                 break;
1921         case CHIP_ARCTURUS:
1922                 adev->gfx.config.max_hw_contexts = 8;
1923                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1924                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1925                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1926                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1927                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1928                 gb_addr_config &= ~0xf3e777ff;
1929                 gb_addr_config |= 0x22014042;
1930                 break;
1931         case CHIP_RENOIR:
1932                 adev->gfx.config.max_hw_contexts = 8;
1933                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1934                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1935                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1936                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1937                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1938                 gb_addr_config &= ~0xf3e777ff;
1939                 gb_addr_config |= 0x22010042;
1940                 break;
1941         default:
1942                 BUG();
1943                 break;
1944         }
1945
1946         adev->gfx.config.gb_addr_config = gb_addr_config;
1947
1948         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1949                         REG_GET_FIELD(
1950                                         adev->gfx.config.gb_addr_config,
1951                                         GB_ADDR_CONFIG,
1952                                         NUM_PIPES);
1953
1954         adev->gfx.config.max_tile_pipes =
1955                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1956
1957         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1958                         REG_GET_FIELD(
1959                                         adev->gfx.config.gb_addr_config,
1960                                         GB_ADDR_CONFIG,
1961                                         NUM_BANKS);
1962         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1963                         REG_GET_FIELD(
1964                                         adev->gfx.config.gb_addr_config,
1965                                         GB_ADDR_CONFIG,
1966                                         MAX_COMPRESSED_FRAGS);
1967         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1968                         REG_GET_FIELD(
1969                                         adev->gfx.config.gb_addr_config,
1970                                         GB_ADDR_CONFIG,
1971                                         NUM_RB_PER_SE);
1972         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1973                         REG_GET_FIELD(
1974                                         adev->gfx.config.gb_addr_config,
1975                                         GB_ADDR_CONFIG,
1976                                         NUM_SHADER_ENGINES);
1977         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1978                         REG_GET_FIELD(
1979                                         adev->gfx.config.gb_addr_config,
1980                                         GB_ADDR_CONFIG,
1981                                         PIPE_INTERLEAVE_SIZE));
1982
1983         return 0;
1984 }
1985
1986 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1987                                       int mec, int pipe, int queue)
1988 {
1989         int r;
1990         unsigned irq_type;
1991         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1992
1993         ring = &adev->gfx.compute_ring[ring_id];
1994
1995         /* mec0 is me1 */
1996         ring->me = mec + 1;
1997         ring->pipe = pipe;
1998         ring->queue = queue;
1999
2000         ring->ring_obj = NULL;
2001         ring->use_doorbell = true;
2002         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2003         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2004                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2005         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2006
2007         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2008                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2009                 + ring->pipe;
2010
2011         /* type-2 packets are deprecated on MEC, use type-3 instead */
2012         r = amdgpu_ring_init(adev, ring, 1024,
2013                              &adev->gfx.eop_irq, irq_type);
2014         if (r)
2015                 return r;
2016
2017
2018         return 0;
2019 }
2020
2021 static int gfx_v9_0_sw_init(void *handle)
2022 {
2023         int i, j, k, r, ring_id;
2024         struct amdgpu_ring *ring;
2025         struct amdgpu_kiq *kiq;
2026         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2027
2028         switch (adev->asic_type) {
2029         case CHIP_VEGA10:
2030         case CHIP_VEGA12:
2031         case CHIP_VEGA20:
2032         case CHIP_RAVEN:
2033         case CHIP_ARCTURUS:
2034         case CHIP_RENOIR:
2035                 adev->gfx.mec.num_mec = 2;
2036                 break;
2037         default:
2038                 adev->gfx.mec.num_mec = 1;
2039                 break;
2040         }
2041
2042         adev->gfx.mec.num_pipe_per_mec = 4;
2043         adev->gfx.mec.num_queue_per_pipe = 8;
2044
2045         /* EOP Event */
2046         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2047         if (r)
2048                 return r;
2049
2050         /* Privileged reg */
2051         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2052                               &adev->gfx.priv_reg_irq);
2053         if (r)
2054                 return r;
2055
2056         /* Privileged inst */
2057         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2058                               &adev->gfx.priv_inst_irq);
2059         if (r)
2060                 return r;
2061
2062         /* ECC error */
2063         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2064                               &adev->gfx.cp_ecc_error_irq);
2065         if (r)
2066                 return r;
2067
2068         /* FUE error */
2069         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2070                               &adev->gfx.cp_ecc_error_irq);
2071         if (r)
2072                 return r;
2073
2074         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2075
2076         gfx_v9_0_scratch_init(adev);
2077
2078         r = gfx_v9_0_init_microcode(adev);
2079         if (r) {
2080                 DRM_ERROR("Failed to load gfx firmware!\n");
2081                 return r;
2082         }
2083
2084         r = adev->gfx.rlc.funcs->init(adev);
2085         if (r) {
2086                 DRM_ERROR("Failed to init rlc BOs!\n");
2087                 return r;
2088         }
2089
2090         r = gfx_v9_0_mec_init(adev);
2091         if (r) {
2092                 DRM_ERROR("Failed to init MEC BOs!\n");
2093                 return r;
2094         }
2095
2096         /* set up the gfx ring */
2097         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2098                 ring = &adev->gfx.gfx_ring[i];
2099                 ring->ring_obj = NULL;
2100                 if (!i)
2101                         sprintf(ring->name, "gfx");
2102                 else
2103                         sprintf(ring->name, "gfx_%d", i);
2104                 ring->use_doorbell = true;
2105                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2106                 r = amdgpu_ring_init(adev, ring, 1024,
2107                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2108                 if (r)
2109                         return r;
2110         }
2111
2112         /* set up the compute queues - allocate horizontally across pipes */
2113         ring_id = 0;
2114         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2115                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2116                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2117                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2118                                         continue;
2119
2120                                 r = gfx_v9_0_compute_ring_init(adev,
2121                                                                ring_id,
2122                                                                i, k, j);
2123                                 if (r)
2124                                         return r;
2125
2126                                 ring_id++;
2127                         }
2128                 }
2129         }
2130
2131         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2132         if (r) {
2133                 DRM_ERROR("Failed to init KIQ BOs!\n");
2134                 return r;
2135         }
2136
2137         kiq = &adev->gfx.kiq;
2138         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2139         if (r)
2140                 return r;
2141
2142         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2143         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2144         if (r)
2145                 return r;
2146
2147         adev->gfx.ce_ram_size = 0x8000;
2148
2149         r = gfx_v9_0_gpu_early_init(adev);
2150         if (r)
2151                 return r;
2152
2153         return 0;
2154 }
2155
2156
2157 static int gfx_v9_0_sw_fini(void *handle)
2158 {
2159         int i;
2160         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2161
2162         amdgpu_gfx_ras_fini(adev);
2163
2164         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2165                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2166         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2167                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2168
2169         amdgpu_gfx_mqd_sw_fini(adev);
2170         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2171         amdgpu_gfx_kiq_fini(adev);
2172
2173         gfx_v9_0_mec_fini(adev);
2174         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2175         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2176                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2177                                 &adev->gfx.rlc.cp_table_gpu_addr,
2178                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2179         }
2180         gfx_v9_0_free_microcode(adev);
2181
2182         return 0;
2183 }
2184
2185
2186 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2187 {
2188         /* TODO */
2189 }
2190
2191 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2192 {
2193         u32 data;
2194
2195         if (instance == 0xffffffff)
2196                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2197         else
2198                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2199
2200         if (se_num == 0xffffffff)
2201                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2202         else
2203                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2204
2205         if (sh_num == 0xffffffff)
2206                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2207         else
2208                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2209
2210         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2211 }
2212
2213 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2214 {
2215         u32 data, mask;
2216
2217         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2218         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2219
2220         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2221         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2222
2223         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2224                                          adev->gfx.config.max_sh_per_se);
2225
2226         return (~data) & mask;
2227 }
2228
2229 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2230 {
2231         int i, j;
2232         u32 data;
2233         u32 active_rbs = 0;
2234         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2235                                         adev->gfx.config.max_sh_per_se;
2236
2237         mutex_lock(&adev->grbm_idx_mutex);
2238         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2239                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2240                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2241                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2242                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2243                                                rb_bitmap_width_per_sh);
2244                 }
2245         }
2246         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2247         mutex_unlock(&adev->grbm_idx_mutex);
2248
2249         adev->gfx.config.backend_enable_mask = active_rbs;
2250         adev->gfx.config.num_rbs = hweight32(active_rbs);
2251 }
2252
2253 #define DEFAULT_SH_MEM_BASES    (0x6000)
2254 #define FIRST_COMPUTE_VMID      (8)
2255 #define LAST_COMPUTE_VMID       (16)
2256 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2257 {
2258         int i;
2259         uint32_t sh_mem_config;
2260         uint32_t sh_mem_bases;
2261
2262         /*
2263          * Configure apertures:
2264          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2265          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2266          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2267          */
2268         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2269
2270         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2271                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2272                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2273
2274         mutex_lock(&adev->srbm_mutex);
2275         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2276                 soc15_grbm_select(adev, 0, 0, 0, i);
2277                 /* CP and shaders */
2278                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2279                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2280         }
2281         soc15_grbm_select(adev, 0, 0, 0, 0);
2282         mutex_unlock(&adev->srbm_mutex);
2283
2284         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2285            acccess. These should be enabled by FW for target VMIDs. */
2286         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2287                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2288                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2289                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2290                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2291         }
2292 }
2293
2294 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2295 {
2296         int vmid;
2297
2298         /*
2299          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2300          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2301          * the driver can enable them for graphics. VMID0 should maintain
2302          * access so that HWS firmware can save/restore entries.
2303          */
2304         for (vmid = 1; vmid < 16; vmid++) {
2305                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2306                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2307                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2308                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2309         }
2310 }
2311
2312 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2313 {
2314         u32 tmp;
2315         int i;
2316
2317         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2318
2319         gfx_v9_0_tiling_mode_table_init(adev);
2320
2321         gfx_v9_0_setup_rb(adev);
2322         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2323         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2324
2325         /* XXX SH_MEM regs */
2326         /* where to put LDS, scratch, GPUVM in FSA64 space */
2327         mutex_lock(&adev->srbm_mutex);
2328         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2329                 soc15_grbm_select(adev, 0, 0, 0, i);
2330                 /* CP and shaders */
2331                 if (i == 0) {
2332                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2333                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2334                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2335                                             !!amdgpu_noretry);
2336                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2337                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2338                 } else {
2339                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2340                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2341                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2342                                             !!amdgpu_noretry);
2343                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2344                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2345                                 (adev->gmc.private_aperture_start >> 48));
2346                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2347                                 (adev->gmc.shared_aperture_start >> 48));
2348                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2349                 }
2350         }
2351         soc15_grbm_select(adev, 0, 0, 0, 0);
2352
2353         mutex_unlock(&adev->srbm_mutex);
2354
2355         gfx_v9_0_init_compute_vmid(adev);
2356         gfx_v9_0_init_gds_vmid(adev);
2357 }
2358
2359 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2360 {
2361         u32 i, j, k;
2362         u32 mask;
2363
2364         mutex_lock(&adev->grbm_idx_mutex);
2365         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2366                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2367                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2368                         for (k = 0; k < adev->usec_timeout; k++) {
2369                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2370                                         break;
2371                                 udelay(1);
2372                         }
2373                         if (k == adev->usec_timeout) {
2374                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2375                                                       0xffffffff, 0xffffffff);
2376                                 mutex_unlock(&adev->grbm_idx_mutex);
2377                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2378                                          i, j);
2379                                 return;
2380                         }
2381                 }
2382         }
2383         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2384         mutex_unlock(&adev->grbm_idx_mutex);
2385
2386         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2387                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2388                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2389                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2390         for (k = 0; k < adev->usec_timeout; k++) {
2391                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2392                         break;
2393                 udelay(1);
2394         }
2395 }
2396
2397 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2398                                                bool enable)
2399 {
2400         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2401
2402         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2403         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2404         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2405         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2406
2407         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2408 }
2409
2410 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2411 {
2412         /* csib */
2413         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2414                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2415         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2416                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2417         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2418                         adev->gfx.rlc.clear_state_size);
2419 }
2420
2421 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2422                                 int indirect_offset,
2423                                 int list_size,
2424                                 int *unique_indirect_regs,
2425                                 int unique_indirect_reg_count,
2426                                 int *indirect_start_offsets,
2427                                 int *indirect_start_offsets_count,
2428                                 int max_start_offsets_count)
2429 {
2430         int idx;
2431
2432         for (; indirect_offset < list_size; indirect_offset++) {
2433                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2434                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2435                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2436
2437                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2438                         indirect_offset += 2;
2439
2440                         /* look for the matching indice */
2441                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2442                                 if (unique_indirect_regs[idx] ==
2443                                         register_list_format[indirect_offset] ||
2444                                         !unique_indirect_regs[idx])
2445                                         break;
2446                         }
2447
2448                         BUG_ON(idx >= unique_indirect_reg_count);
2449
2450                         if (!unique_indirect_regs[idx])
2451                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2452
2453                         indirect_offset++;
2454                 }
2455         }
2456 }
2457
2458 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2459 {
2460         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2461         int unique_indirect_reg_count = 0;
2462
2463         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2464         int indirect_start_offsets_count = 0;
2465
2466         int list_size = 0;
2467         int i = 0, j = 0;
2468         u32 tmp = 0;
2469
2470         u32 *register_list_format =
2471                 kmemdup(adev->gfx.rlc.register_list_format,
2472                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2473         if (!register_list_format)
2474                 return -ENOMEM;
2475
2476         /* setup unique_indirect_regs array and indirect_start_offsets array */
2477         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2478         gfx_v9_1_parse_ind_reg_list(register_list_format,
2479                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2480                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2481                                     unique_indirect_regs,
2482                                     unique_indirect_reg_count,
2483                                     indirect_start_offsets,
2484                                     &indirect_start_offsets_count,
2485                                     ARRAY_SIZE(indirect_start_offsets));
2486
2487         /* enable auto inc in case it is disabled */
2488         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2489         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2490         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2491
2492         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2493         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2494                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2495         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2496                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2497                         adev->gfx.rlc.register_restore[i]);
2498
2499         /* load indirect register */
2500         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2501                 adev->gfx.rlc.reg_list_format_start);
2502
2503         /* direct register portion */
2504         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2505                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2506                         register_list_format[i]);
2507
2508         /* indirect register portion */
2509         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2510                 if (register_list_format[i] == 0xFFFFFFFF) {
2511                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2512                         continue;
2513                 }
2514
2515                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2516                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2517
2518                 for (j = 0; j < unique_indirect_reg_count; j++) {
2519                         if (register_list_format[i] == unique_indirect_regs[j]) {
2520                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2521                                 break;
2522                         }
2523                 }
2524
2525                 BUG_ON(j >= unique_indirect_reg_count);
2526
2527                 i++;
2528         }
2529
2530         /* set save/restore list size */
2531         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2532         list_size = list_size >> 1;
2533         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2534                 adev->gfx.rlc.reg_restore_list_size);
2535         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2536
2537         /* write the starting offsets to RLC scratch ram */
2538         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2539                 adev->gfx.rlc.starting_offsets_start);
2540         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2541                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2542                        indirect_start_offsets[i]);
2543
2544         /* load unique indirect regs*/
2545         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2546                 if (unique_indirect_regs[i] != 0) {
2547                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2548                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2549                                unique_indirect_regs[i] & 0x3FFFF);
2550
2551                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2552                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2553                                unique_indirect_regs[i] >> 20);
2554                 }
2555         }
2556
2557         kfree(register_list_format);
2558         return 0;
2559 }
2560
2561 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2562 {
2563         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2564 }
2565
2566 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2567                                              bool enable)
2568 {
2569         uint32_t data = 0;
2570         uint32_t default_data = 0;
2571
2572         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2573         if (enable == true) {
2574                 /* enable GFXIP control over CGPG */
2575                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2576                 if(default_data != data)
2577                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2578
2579                 /* update status */
2580                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2581                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2582                 if(default_data != data)
2583                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2584         } else {
2585                 /* restore GFXIP control over GCPG */
2586                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2587                 if(default_data != data)
2588                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2589         }
2590 }
2591
2592 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2593 {
2594         uint32_t data = 0;
2595
2596         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2597                               AMD_PG_SUPPORT_GFX_SMG |
2598                               AMD_PG_SUPPORT_GFX_DMG)) {
2599                 /* init IDLE_POLL_COUNT = 60 */
2600                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2601                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2602                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2603                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2604
2605                 /* init RLC PG Delay */
2606                 data = 0;
2607                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2608                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2609                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2610                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2611                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2612
2613                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2614                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2615                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2616                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2617
2618                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2619                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2620                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2621                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2622
2623                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2624                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2625
2626                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2627                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2628                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2629
2630                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2631         }
2632 }
2633
2634 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2635                                                 bool enable)
2636 {
2637         uint32_t data = 0;
2638         uint32_t default_data = 0;
2639
2640         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2641         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2642                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2643                              enable ? 1 : 0);
2644         if (default_data != data)
2645                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2646 }
2647
2648 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2649                                                 bool enable)
2650 {
2651         uint32_t data = 0;
2652         uint32_t default_data = 0;
2653
2654         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2655         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2656                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2657                              enable ? 1 : 0);
2658         if(default_data != data)
2659                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2660 }
2661
2662 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2663                                         bool enable)
2664 {
2665         uint32_t data = 0;
2666         uint32_t default_data = 0;
2667
2668         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2669         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2670                              CP_PG_DISABLE,
2671                              enable ? 0 : 1);
2672         if(default_data != data)
2673                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2674 }
2675
2676 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2677                                                 bool enable)
2678 {
2679         uint32_t data, default_data;
2680
2681         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2682         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2683                              GFX_POWER_GATING_ENABLE,
2684                              enable ? 1 : 0);
2685         if(default_data != data)
2686                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2687 }
2688
2689 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2690                                                 bool enable)
2691 {
2692         uint32_t data, default_data;
2693
2694         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2695         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2696                              GFX_PIPELINE_PG_ENABLE,
2697                              enable ? 1 : 0);
2698         if(default_data != data)
2699                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2700
2701         if (!enable)
2702                 /* read any GFX register to wake up GFX */
2703                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2704 }
2705
2706 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2707                                                        bool enable)
2708 {
2709         uint32_t data, default_data;
2710
2711         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2712         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2713                              STATIC_PER_CU_PG_ENABLE,
2714                              enable ? 1 : 0);
2715         if(default_data != data)
2716                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2717 }
2718
2719 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2720                                                 bool enable)
2721 {
2722         uint32_t data, default_data;
2723
2724         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2725         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2726                              DYN_PER_CU_PG_ENABLE,
2727                              enable ? 1 : 0);
2728         if(default_data != data)
2729                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2730 }
2731
2732 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2733 {
2734         gfx_v9_0_init_csb(adev);
2735
2736         /*
2737          * Rlc save restore list is workable since v2_1.
2738          * And it's needed by gfxoff feature.
2739          */
2740         if (adev->gfx.rlc.is_rlc_v2_1) {
2741                 if (adev->asic_type == CHIP_VEGA12 ||
2742                     (adev->asic_type == CHIP_RAVEN &&
2743                      adev->rev_id >= 8))
2744                         gfx_v9_1_init_rlc_save_restore_list(adev);
2745                 gfx_v9_0_enable_save_restore_machine(adev);
2746         }
2747
2748         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2749                               AMD_PG_SUPPORT_GFX_SMG |
2750                               AMD_PG_SUPPORT_GFX_DMG |
2751                               AMD_PG_SUPPORT_CP |
2752                               AMD_PG_SUPPORT_GDS |
2753                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2754                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2755                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2756                 gfx_v9_0_init_gfx_power_gating(adev);
2757         }
2758 }
2759
2760 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2761 {
2762         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2763         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2764         gfx_v9_0_wait_for_rlc_serdes(adev);
2765 }
2766
2767 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2768 {
2769         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2770         udelay(50);
2771         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2772         udelay(50);
2773 }
2774
2775 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2776 {
2777 #ifdef AMDGPU_RLC_DEBUG_RETRY
2778         u32 rlc_ucode_ver;
2779 #endif
2780
2781         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2782         udelay(50);
2783
2784         /* carrizo do enable cp interrupt after cp inited */
2785         if (!(adev->flags & AMD_IS_APU)) {
2786                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2787                 udelay(50);
2788         }
2789
2790 #ifdef AMDGPU_RLC_DEBUG_RETRY
2791         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2792         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2793         if(rlc_ucode_ver == 0x108) {
2794                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2795                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2796                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2797                  * default is 0x9C4 to create a 100us interval */
2798                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2799                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2800                  * to disable the page fault retry interrupts, default is
2801                  * 0x100 (256) */
2802                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2803         }
2804 #endif
2805 }
2806
2807 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2808 {
2809         const struct rlc_firmware_header_v2_0 *hdr;
2810         const __le32 *fw_data;
2811         unsigned i, fw_size;
2812
2813         if (!adev->gfx.rlc_fw)
2814                 return -EINVAL;
2815
2816         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2817         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2818
2819         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2820                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2821         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2822
2823         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2824                         RLCG_UCODE_LOADING_START_ADDRESS);
2825         for (i = 0; i < fw_size; i++)
2826                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2827         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2828
2829         return 0;
2830 }
2831
2832 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2833 {
2834         int r;
2835
2836         if (amdgpu_sriov_vf(adev)) {
2837                 gfx_v9_0_init_csb(adev);
2838                 return 0;
2839         }
2840
2841         adev->gfx.rlc.funcs->stop(adev);
2842
2843         /* disable CG */
2844         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2845
2846         gfx_v9_0_init_pg(adev);
2847
2848         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2849                 /* legacy rlc firmware loading */
2850                 r = gfx_v9_0_rlc_load_microcode(adev);
2851                 if (r)
2852                         return r;
2853         }
2854
2855         switch (adev->asic_type) {
2856         case CHIP_RAVEN:
2857                 if (amdgpu_lbpw == 0)
2858                         gfx_v9_0_enable_lbpw(adev, false);
2859                 else
2860                         gfx_v9_0_enable_lbpw(adev, true);
2861                 break;
2862         case CHIP_VEGA20:
2863                 if (amdgpu_lbpw > 0)
2864                         gfx_v9_0_enable_lbpw(adev, true);
2865                 else
2866                         gfx_v9_0_enable_lbpw(adev, false);
2867                 break;
2868         default:
2869                 break;
2870         }
2871
2872         adev->gfx.rlc.funcs->start(adev);
2873
2874         return 0;
2875 }
2876
2877 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2878 {
2879         int i;
2880         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2881
2882         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2883         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2884         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2885         if (!enable) {
2886                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2887                         adev->gfx.gfx_ring[i].sched.ready = false;
2888         }
2889         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2890         udelay(50);
2891 }
2892
2893 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2894 {
2895         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2896         const struct gfx_firmware_header_v1_0 *ce_hdr;
2897         const struct gfx_firmware_header_v1_0 *me_hdr;
2898         const __le32 *fw_data;
2899         unsigned i, fw_size;
2900
2901         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2902                 return -EINVAL;
2903
2904         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2905                 adev->gfx.pfp_fw->data;
2906         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2907                 adev->gfx.ce_fw->data;
2908         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2909                 adev->gfx.me_fw->data;
2910
2911         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2912         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2913         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2914
2915         gfx_v9_0_cp_gfx_enable(adev, false);
2916
2917         /* PFP */
2918         fw_data = (const __le32 *)
2919                 (adev->gfx.pfp_fw->data +
2920                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2921         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2922         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2923         for (i = 0; i < fw_size; i++)
2924                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2925         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2926
2927         /* CE */
2928         fw_data = (const __le32 *)
2929                 (adev->gfx.ce_fw->data +
2930                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2931         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2932         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2933         for (i = 0; i < fw_size; i++)
2934                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2935         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2936
2937         /* ME */
2938         fw_data = (const __le32 *)
2939                 (adev->gfx.me_fw->data +
2940                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2941         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2942         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2943         for (i = 0; i < fw_size; i++)
2944                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2945         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2946
2947         return 0;
2948 }
2949
2950 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2951 {
2952         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2953         const struct cs_section_def *sect = NULL;
2954         const struct cs_extent_def *ext = NULL;
2955         int r, i, tmp;
2956
2957         /* init the CP */
2958         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2959         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2960
2961         gfx_v9_0_cp_gfx_enable(adev, true);
2962
2963         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2964         if (r) {
2965                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2966                 return r;
2967         }
2968
2969         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2970         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2971
2972         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2973         amdgpu_ring_write(ring, 0x80000000);
2974         amdgpu_ring_write(ring, 0x80000000);
2975
2976         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2977                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2978                         if (sect->id == SECT_CONTEXT) {
2979                                 amdgpu_ring_write(ring,
2980                                        PACKET3(PACKET3_SET_CONTEXT_REG,
2981                                                ext->reg_count));
2982                                 amdgpu_ring_write(ring,
2983                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2984                                 for (i = 0; i < ext->reg_count; i++)
2985                                         amdgpu_ring_write(ring, ext->extent[i]);
2986                         }
2987                 }
2988         }
2989
2990         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2991         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2992
2993         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2994         amdgpu_ring_write(ring, 0);
2995
2996         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2997         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2998         amdgpu_ring_write(ring, 0x8000);
2999         amdgpu_ring_write(ring, 0x8000);
3000
3001         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3002         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3003                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3004         amdgpu_ring_write(ring, tmp);
3005         amdgpu_ring_write(ring, 0);
3006
3007         amdgpu_ring_commit(ring);
3008
3009         return 0;
3010 }
3011
3012 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3013 {
3014         struct amdgpu_ring *ring;
3015         u32 tmp;
3016         u32 rb_bufsz;
3017         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3018
3019         /* Set the write pointer delay */
3020         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3021
3022         /* set the RB to use vmid 0 */
3023         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3024
3025         /* Set ring buffer size */
3026         ring = &adev->gfx.gfx_ring[0];
3027         rb_bufsz = order_base_2(ring->ring_size / 8);
3028         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3029         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3030 #ifdef __BIG_ENDIAN
3031         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3032 #endif
3033         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3034
3035         /* Initialize the ring buffer's write pointers */
3036         ring->wptr = 0;
3037         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3038         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3039
3040         /* set the wb address wether it's enabled or not */
3041         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3042         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3043         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3044
3045         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3046         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3047         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3048
3049         mdelay(1);
3050         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3051
3052         rb_addr = ring->gpu_addr >> 8;
3053         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3054         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3055
3056         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3057         if (ring->use_doorbell) {
3058                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3059                                     DOORBELL_OFFSET, ring->doorbell_index);
3060                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3061                                     DOORBELL_EN, 1);
3062         } else {
3063                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3064         }
3065         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3066
3067         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3068                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3069         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3070
3071         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3072                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3073
3074
3075         /* start the ring */
3076         gfx_v9_0_cp_gfx_start(adev);
3077         ring->sched.ready = true;
3078
3079         return 0;
3080 }
3081
3082 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3083 {
3084         int i;
3085
3086         if (enable) {
3087                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3088         } else {
3089                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3090                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3091                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3092                         adev->gfx.compute_ring[i].sched.ready = false;
3093                 adev->gfx.kiq.ring.sched.ready = false;
3094         }
3095         udelay(50);
3096 }
3097
3098 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3099 {
3100         const struct gfx_firmware_header_v1_0 *mec_hdr;
3101         const __le32 *fw_data;
3102         unsigned i;
3103         u32 tmp;
3104
3105         if (!adev->gfx.mec_fw)
3106                 return -EINVAL;
3107
3108         gfx_v9_0_cp_compute_enable(adev, false);
3109
3110         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3111         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3112
3113         fw_data = (const __le32 *)
3114                 (adev->gfx.mec_fw->data +
3115                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3116         tmp = 0;
3117         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3118         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3119         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3120
3121         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3122                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3123         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3124                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3125
3126         /* MEC1 */
3127         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3128                          mec_hdr->jt_offset);
3129         for (i = 0; i < mec_hdr->jt_size; i++)
3130                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3131                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3132
3133         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3134                         adev->gfx.mec_fw_version);
3135         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3136
3137         return 0;
3138 }
3139
3140 /* KIQ functions */
3141 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3142 {
3143         uint32_t tmp;
3144         struct amdgpu_device *adev = ring->adev;
3145
3146         /* tell RLC which is KIQ queue */
3147         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3148         tmp &= 0xffffff00;
3149         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3150         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3151         tmp |= 0x80;
3152         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3153 }
3154
3155 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3156 {
3157         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3158         uint64_t queue_mask = 0;
3159         int r, i;
3160
3161         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3162                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3163                         continue;
3164
3165                 /* This situation may be hit in the future if a new HW
3166                  * generation exposes more than 64 queues. If so, the
3167                  * definition of queue_mask needs updating */
3168                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3169                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3170                         break;
3171                 }
3172
3173                 queue_mask |= (1ull << i);
3174         }
3175
3176         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3177         if (r) {
3178                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3179                 return r;
3180         }
3181
3182         /* set resources */
3183         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3184         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3185                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3186         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3187         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3188         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3189         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3190         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3191         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3192         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3193                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3194                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3195                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3196
3197                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3198                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3199                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3200                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3201                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3202                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3203                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3204                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3205                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3206                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3207                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3208                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3209                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3210                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3211                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3212                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3213                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3214         }
3215
3216         r = amdgpu_ring_test_helper(kiq_ring);
3217         if (r)
3218                 DRM_ERROR("KCQ enable failed\n");
3219
3220         return r;
3221 }
3222
3223 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3224 {
3225         struct amdgpu_device *adev = ring->adev;
3226         struct v9_mqd *mqd = ring->mqd_ptr;
3227         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3228         uint32_t tmp;
3229
3230         mqd->header = 0xC0310800;
3231         mqd->compute_pipelinestat_enable = 0x00000001;
3232         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3233         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3234         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3235         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3236         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3237         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3238         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3239         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3240         mqd->compute_misc_reserved = 0x00000003;
3241
3242         mqd->dynamic_cu_mask_addr_lo =
3243                 lower_32_bits(ring->mqd_gpu_addr
3244                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3245         mqd->dynamic_cu_mask_addr_hi =
3246                 upper_32_bits(ring->mqd_gpu_addr
3247                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3248
3249         eop_base_addr = ring->eop_gpu_addr >> 8;
3250         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3251         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3252
3253         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3254         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3255         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3256                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3257
3258         mqd->cp_hqd_eop_control = tmp;
3259
3260         /* enable doorbell? */
3261         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3262
3263         if (ring->use_doorbell) {
3264                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3265                                     DOORBELL_OFFSET, ring->doorbell_index);
3266                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3267                                     DOORBELL_EN, 1);
3268                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3269                                     DOORBELL_SOURCE, 0);
3270                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3271                                     DOORBELL_HIT, 0);
3272         } else {
3273                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3274                                          DOORBELL_EN, 0);
3275         }
3276
3277         mqd->cp_hqd_pq_doorbell_control = tmp;
3278
3279         /* disable the queue if it's active */
3280         ring->wptr = 0;
3281         mqd->cp_hqd_dequeue_request = 0;
3282         mqd->cp_hqd_pq_rptr = 0;
3283         mqd->cp_hqd_pq_wptr_lo = 0;
3284         mqd->cp_hqd_pq_wptr_hi = 0;
3285
3286         /* set the pointer to the MQD */
3287         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3288         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3289
3290         /* set MQD vmid to 0 */
3291         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3292         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3293         mqd->cp_mqd_control = tmp;
3294
3295         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3296         hqd_gpu_addr = ring->gpu_addr >> 8;
3297         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3298         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3299
3300         /* set up the HQD, this is similar to CP_RB0_CNTL */
3301         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3302         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3303                             (order_base_2(ring->ring_size / 4) - 1));
3304         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3305                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3306 #ifdef __BIG_ENDIAN
3307         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3308 #endif
3309         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3310         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3311         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3312         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3313         mqd->cp_hqd_pq_control = tmp;
3314
3315         /* set the wb address whether it's enabled or not */
3316         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3317         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3318         mqd->cp_hqd_pq_rptr_report_addr_hi =
3319                 upper_32_bits(wb_gpu_addr) & 0xffff;
3320
3321         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3322         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3323         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3324         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3325
3326         tmp = 0;
3327         /* enable the doorbell if requested */
3328         if (ring->use_doorbell) {
3329                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3330                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3331                                 DOORBELL_OFFSET, ring->doorbell_index);
3332
3333                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3334                                          DOORBELL_EN, 1);
3335                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3336                                          DOORBELL_SOURCE, 0);
3337                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3338                                          DOORBELL_HIT, 0);
3339         }
3340
3341         mqd->cp_hqd_pq_doorbell_control = tmp;
3342
3343         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3344         ring->wptr = 0;
3345         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3346
3347         /* set the vmid for the queue */
3348         mqd->cp_hqd_vmid = 0;
3349
3350         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3351         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3352         mqd->cp_hqd_persistent_state = tmp;
3353
3354         /* set MIN_IB_AVAIL_SIZE */
3355         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3356         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3357         mqd->cp_hqd_ib_control = tmp;
3358
3359         /* activate the queue */
3360         mqd->cp_hqd_active = 1;
3361
3362         return 0;
3363 }
3364
3365 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3366 {
3367         struct amdgpu_device *adev = ring->adev;
3368         struct v9_mqd *mqd = ring->mqd_ptr;
3369         int j;
3370
3371         /* disable wptr polling */
3372         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3373
3374         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3375                mqd->cp_hqd_eop_base_addr_lo);
3376         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3377                mqd->cp_hqd_eop_base_addr_hi);
3378
3379         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3380         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3381                mqd->cp_hqd_eop_control);
3382
3383         /* enable doorbell? */
3384         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3385                mqd->cp_hqd_pq_doorbell_control);
3386
3387         /* disable the queue if it's active */
3388         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3389                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3390                 for (j = 0; j < adev->usec_timeout; j++) {
3391                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3392                                 break;
3393                         udelay(1);
3394                 }
3395                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3396                        mqd->cp_hqd_dequeue_request);
3397                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3398                        mqd->cp_hqd_pq_rptr);
3399                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3400                        mqd->cp_hqd_pq_wptr_lo);
3401                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3402                        mqd->cp_hqd_pq_wptr_hi);
3403         }
3404
3405         /* set the pointer to the MQD */
3406         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3407                mqd->cp_mqd_base_addr_lo);
3408         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3409                mqd->cp_mqd_base_addr_hi);
3410
3411         /* set MQD vmid to 0 */
3412         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3413                mqd->cp_mqd_control);
3414
3415         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3416         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3417                mqd->cp_hqd_pq_base_lo);
3418         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3419                mqd->cp_hqd_pq_base_hi);
3420
3421         /* set up the HQD, this is similar to CP_RB0_CNTL */
3422         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3423                mqd->cp_hqd_pq_control);
3424
3425         /* set the wb address whether it's enabled or not */
3426         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3427                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3428         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3429                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3430
3431         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3432         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3433                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3434         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3435                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3436
3437         /* enable the doorbell if requested */
3438         if (ring->use_doorbell) {
3439                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3440                                         (adev->doorbell_index.kiq * 2) << 2);
3441                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3442                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3443         }
3444
3445         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3446                mqd->cp_hqd_pq_doorbell_control);
3447
3448         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3449         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3450                mqd->cp_hqd_pq_wptr_lo);
3451         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3452                mqd->cp_hqd_pq_wptr_hi);
3453
3454         /* set the vmid for the queue */
3455         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3456
3457         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3458                mqd->cp_hqd_persistent_state);
3459
3460         /* activate the queue */
3461         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3462                mqd->cp_hqd_active);
3463
3464         if (ring->use_doorbell)
3465                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3466
3467         return 0;
3468 }
3469
3470 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3471 {
3472         struct amdgpu_device *adev = ring->adev;
3473         int j;
3474
3475         /* disable the queue if it's active */
3476         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3477
3478                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3479
3480                 for (j = 0; j < adev->usec_timeout; j++) {
3481                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3482                                 break;
3483                         udelay(1);
3484                 }
3485
3486                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3487                         DRM_DEBUG("KIQ dequeue request failed.\n");
3488
3489                         /* Manual disable if dequeue request times out */
3490                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3491                 }
3492
3493                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3494                       0);
3495         }
3496
3497         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3498         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3499         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3500         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3501         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3502         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3503         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3504         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3505
3506         return 0;
3507 }
3508
3509 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3510 {
3511         struct amdgpu_device *adev = ring->adev;
3512         struct v9_mqd *mqd = ring->mqd_ptr;
3513         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3514
3515         gfx_v9_0_kiq_setting(ring);
3516
3517         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3518                 /* reset MQD to a clean status */
3519                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3520                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3521
3522                 /* reset ring buffer */
3523                 ring->wptr = 0;
3524                 amdgpu_ring_clear_ring(ring);
3525
3526                 mutex_lock(&adev->srbm_mutex);
3527                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3528                 gfx_v9_0_kiq_init_register(ring);
3529                 soc15_grbm_select(adev, 0, 0, 0, 0);
3530                 mutex_unlock(&adev->srbm_mutex);
3531         } else {
3532                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3533                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3534                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3535                 mutex_lock(&adev->srbm_mutex);
3536                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3537                 gfx_v9_0_mqd_init(ring);
3538                 gfx_v9_0_kiq_init_register(ring);
3539                 soc15_grbm_select(adev, 0, 0, 0, 0);
3540                 mutex_unlock(&adev->srbm_mutex);
3541
3542                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3543                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3544         }
3545
3546         return 0;
3547 }
3548
3549 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3550 {
3551         struct amdgpu_device *adev = ring->adev;
3552         struct v9_mqd *mqd = ring->mqd_ptr;
3553         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3554
3555         if (!adev->in_gpu_reset && !adev->in_suspend) {
3556                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3557                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3558                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3559                 mutex_lock(&adev->srbm_mutex);
3560                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3561                 gfx_v9_0_mqd_init(ring);
3562                 soc15_grbm_select(adev, 0, 0, 0, 0);
3563                 mutex_unlock(&adev->srbm_mutex);
3564
3565                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3566                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3567         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3568                 /* reset MQD to a clean status */
3569                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3570                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3571
3572                 /* reset ring buffer */
3573                 ring->wptr = 0;
3574                 amdgpu_ring_clear_ring(ring);
3575         } else {
3576                 amdgpu_ring_clear_ring(ring);
3577         }
3578
3579         return 0;
3580 }
3581
3582 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3583 {
3584         struct amdgpu_ring *ring;
3585         int r;
3586
3587         ring = &adev->gfx.kiq.ring;
3588
3589         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3590         if (unlikely(r != 0))
3591                 return r;
3592
3593         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3594         if (unlikely(r != 0))
3595                 return r;
3596
3597         gfx_v9_0_kiq_init_queue(ring);
3598         amdgpu_bo_kunmap(ring->mqd_obj);
3599         ring->mqd_ptr = NULL;
3600         amdgpu_bo_unreserve(ring->mqd_obj);
3601         ring->sched.ready = true;
3602         return 0;
3603 }
3604
3605 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3606 {
3607         struct amdgpu_ring *ring = NULL;
3608         int r = 0, i;
3609
3610         gfx_v9_0_cp_compute_enable(adev, true);
3611
3612         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3613                 ring = &adev->gfx.compute_ring[i];
3614
3615                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3616                 if (unlikely(r != 0))
3617                         goto done;
3618                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3619                 if (!r) {
3620                         r = gfx_v9_0_kcq_init_queue(ring);
3621                         amdgpu_bo_kunmap(ring->mqd_obj);
3622                         ring->mqd_ptr = NULL;
3623                 }
3624                 amdgpu_bo_unreserve(ring->mqd_obj);
3625                 if (r)
3626                         goto done;
3627         }
3628
3629         r = gfx_v9_0_kiq_kcq_enable(adev);
3630 done:
3631         return r;
3632 }
3633
3634 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3635 {
3636         int r, i;
3637         struct amdgpu_ring *ring;
3638
3639         if (!(adev->flags & AMD_IS_APU))
3640                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3641
3642         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3643                 if (adev->asic_type != CHIP_ARCTURUS) {
3644                         /* legacy firmware loading */
3645                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3646                         if (r)
3647                                 return r;
3648                 }
3649
3650                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3651                 if (r)
3652                         return r;
3653         }
3654
3655         r = gfx_v9_0_kiq_resume(adev);
3656         if (r)
3657                 return r;
3658
3659         if (adev->asic_type != CHIP_ARCTURUS) {
3660                 r = gfx_v9_0_cp_gfx_resume(adev);
3661                 if (r)
3662                         return r;
3663         }
3664
3665         r = gfx_v9_0_kcq_resume(adev);
3666         if (r)
3667                 return r;
3668
3669         if (adev->asic_type != CHIP_ARCTURUS) {
3670                 ring = &adev->gfx.gfx_ring[0];
3671                 r = amdgpu_ring_test_helper(ring);
3672                 if (r)
3673                         return r;
3674         }
3675
3676         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3677                 ring = &adev->gfx.compute_ring[i];
3678                 amdgpu_ring_test_helper(ring);
3679         }
3680
3681         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3682
3683         return 0;
3684 }
3685
3686 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3687 {
3688         if (adev->asic_type != CHIP_ARCTURUS)
3689                 gfx_v9_0_cp_gfx_enable(adev, enable);
3690         gfx_v9_0_cp_compute_enable(adev, enable);
3691 }
3692
3693 static int gfx_v9_0_hw_init(void *handle)
3694 {
3695         int r;
3696         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3697
3698         if (!amdgpu_sriov_vf(adev))
3699                 gfx_v9_0_init_golden_registers(adev);
3700
3701         gfx_v9_0_constants_init(adev);
3702
3703         r = gfx_v9_0_csb_vram_pin(adev);
3704         if (r)
3705                 return r;
3706
3707         r = adev->gfx.rlc.funcs->resume(adev);
3708         if (r)
3709                 return r;
3710
3711         r = gfx_v9_0_cp_resume(adev);
3712         if (r)
3713                 return r;
3714
3715         return r;
3716 }
3717
3718 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3719 {
3720         int r, i;
3721         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3722
3723         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3724         if (r)
3725                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3726
3727         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3728                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3729
3730                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3731                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3732                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3733                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3734                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3735                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3736                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3737                 amdgpu_ring_write(kiq_ring, 0);
3738                 amdgpu_ring_write(kiq_ring, 0);
3739                 amdgpu_ring_write(kiq_ring, 0);
3740         }
3741         r = amdgpu_ring_test_helper(kiq_ring);
3742         if (r)
3743                 DRM_ERROR("KCQ disable failed\n");
3744
3745         return r;
3746 }
3747
3748 static int gfx_v9_0_hw_fini(void *handle)
3749 {
3750         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3751
3752         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3753         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3754         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3755
3756         /* DF freeze and kcq disable will fail */
3757         if (!amdgpu_ras_intr_triggered())
3758                 /* disable KCQ to avoid CPC touch memory not valid anymore */
3759                 gfx_v9_0_kcq_disable(adev);
3760
3761         if (amdgpu_sriov_vf(adev)) {
3762                 gfx_v9_0_cp_gfx_enable(adev, false);
3763                 /* must disable polling for SRIOV when hw finished, otherwise
3764                  * CPC engine may still keep fetching WB address which is already
3765                  * invalid after sw finished and trigger DMAR reading error in
3766                  * hypervisor side.
3767                  */
3768                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3769                 return 0;
3770         }
3771
3772         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3773          * otherwise KIQ is hanging when binding back
3774          */
3775         if (!adev->in_gpu_reset && !adev->in_suspend) {
3776                 mutex_lock(&adev->srbm_mutex);
3777                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3778                                 adev->gfx.kiq.ring.pipe,
3779                                 adev->gfx.kiq.ring.queue, 0);
3780                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3781                 soc15_grbm_select(adev, 0, 0, 0, 0);
3782                 mutex_unlock(&adev->srbm_mutex);
3783         }
3784
3785         gfx_v9_0_cp_enable(adev, false);
3786         adev->gfx.rlc.funcs->stop(adev);
3787
3788         gfx_v9_0_csb_vram_unpin(adev);
3789
3790         return 0;
3791 }
3792
3793 static int gfx_v9_0_suspend(void *handle)
3794 {
3795         return gfx_v9_0_hw_fini(handle);
3796 }
3797
3798 static int gfx_v9_0_resume(void *handle)
3799 {
3800         return gfx_v9_0_hw_init(handle);
3801 }
3802
3803 static bool gfx_v9_0_is_idle(void *handle)
3804 {
3805         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3806
3807         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3808                                 GRBM_STATUS, GUI_ACTIVE))
3809                 return false;
3810         else
3811                 return true;
3812 }
3813
3814 static int gfx_v9_0_wait_for_idle(void *handle)
3815 {
3816         unsigned i;
3817         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3818
3819         for (i = 0; i < adev->usec_timeout; i++) {
3820                 if (gfx_v9_0_is_idle(handle))
3821                         return 0;
3822                 udelay(1);
3823         }
3824         return -ETIMEDOUT;
3825 }
3826
3827 static int gfx_v9_0_soft_reset(void *handle)
3828 {
3829         u32 grbm_soft_reset = 0;
3830         u32 tmp;
3831         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3832
3833         /* GRBM_STATUS */
3834         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3835         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3836                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3837                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3838                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3839                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3840                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3841                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3842                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3843                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3844                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3845         }
3846
3847         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3848                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3849                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3850         }
3851
3852         /* GRBM_STATUS2 */
3853         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3854         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3855                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3856                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3857
3858
3859         if (grbm_soft_reset) {
3860                 /* stop the rlc */
3861                 adev->gfx.rlc.funcs->stop(adev);
3862
3863                 if (adev->asic_type != CHIP_ARCTURUS)
3864                         /* Disable GFX parsing/prefetching */
3865                         gfx_v9_0_cp_gfx_enable(adev, false);
3866
3867                 /* Disable MEC parsing/prefetching */
3868                 gfx_v9_0_cp_compute_enable(adev, false);
3869
3870                 if (grbm_soft_reset) {
3871                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3872                         tmp |= grbm_soft_reset;
3873                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3874                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3875                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3876
3877                         udelay(50);
3878
3879                         tmp &= ~grbm_soft_reset;
3880                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3881                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3882                 }
3883
3884                 /* Wait a little for things to settle down */
3885                 udelay(50);
3886         }
3887         return 0;
3888 }
3889
3890 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3891 {
3892         uint64_t clock;
3893
3894         mutex_lock(&adev->gfx.gpu_clock_mutex);
3895         if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
3896                 uint32_t tmp, lsb, msb, i = 0;
3897                 do {
3898                         if (i != 0)
3899                                 udelay(1);
3900                         tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3901                         lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB);
3902                         msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB);
3903                         i++;
3904                 } while (unlikely(tmp != msb) && (i < adev->usec_timeout));
3905                 clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL);
3906         } else {
3907                 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3908                 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3909                         ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3910         }
3911         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3912         return clock;
3913 }
3914
3915 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3916                                           uint32_t vmid,
3917                                           uint32_t gds_base, uint32_t gds_size,
3918                                           uint32_t gws_base, uint32_t gws_size,
3919                                           uint32_t oa_base, uint32_t oa_size)
3920 {
3921         struct amdgpu_device *adev = ring->adev;
3922
3923         /* GDS Base */
3924         gfx_v9_0_write_data_to_reg(ring, 0, false,
3925                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3926                                    gds_base);
3927
3928         /* GDS Size */
3929         gfx_v9_0_write_data_to_reg(ring, 0, false,
3930                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3931                                    gds_size);
3932
3933         /* GWS */
3934         gfx_v9_0_write_data_to_reg(ring, 0, false,
3935                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3936                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3937
3938         /* OA */
3939         gfx_v9_0_write_data_to_reg(ring, 0, false,
3940                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3941                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
3942 }
3943
3944 static const u32 vgpr_init_compute_shader[] =
3945 {
3946         0xb07c0000, 0xbe8000ff,
3947         0x000000f8, 0xbf110800,
3948         0x7e000280, 0x7e020280,
3949         0x7e040280, 0x7e060280,
3950         0x7e080280, 0x7e0a0280,
3951         0x7e0c0280, 0x7e0e0280,
3952         0x80808800, 0xbe803200,
3953         0xbf84fff5, 0xbf9c0000,
3954         0xd28c0001, 0x0001007f,
3955         0xd28d0001, 0x0002027e,
3956         0x10020288, 0xb8810904,
3957         0xb7814000, 0xd1196a01,
3958         0x00000301, 0xbe800087,
3959         0xbefc00c1, 0xd89c4000,
3960         0x00020201, 0xd89cc080,
3961         0x00040401, 0x320202ff,
3962         0x00000800, 0x80808100,
3963         0xbf84fff8, 0x7e020280,
3964         0xbf810000, 0x00000000,
3965 };
3966
3967 static const u32 sgpr_init_compute_shader[] =
3968 {
3969         0xb07c0000, 0xbe8000ff,
3970         0x0000005f, 0xbee50080,
3971         0xbe812c65, 0xbe822c65,
3972         0xbe832c65, 0xbe842c65,
3973         0xbe852c65, 0xb77c0005,
3974         0x80808500, 0xbf84fff8,
3975         0xbe800080, 0xbf810000,
3976 };
3977
3978 static const struct soc15_reg_entry vgpr_init_regs[] = {
3979    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3980    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3981    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3982    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3983    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3984    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3985    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3986    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3987    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3988    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3989 };
3990
3991 static const struct soc15_reg_entry sgpr_init_regs[] = {
3992    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3993    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3994    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3995    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3996    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3997    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3998    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3999    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4000    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4001    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4002 };
4003
4004 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4005    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4006    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4007    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4008    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4009    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4010    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4011    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4012    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4013    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4014    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4015    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4016    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4017    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4018    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4019    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4020    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4021    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4022    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4023    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4024    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4025    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4026    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4027    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4028    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4029    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4030    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4031    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4032    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4033    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4034    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4035    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4036    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4037    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4038 };
4039
4040 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4041 {
4042         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4043         int i, r;
4044
4045         /* only support when RAS is enabled */
4046         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4047                 return 0;
4048
4049         r = amdgpu_ring_alloc(ring, 7);
4050         if (r) {
4051                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4052                         ring->name, r);
4053                 return r;
4054         }
4055
4056         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4057         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4058
4059         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4060         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4061                                 PACKET3_DMA_DATA_DST_SEL(1) |
4062                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4063                                 PACKET3_DMA_DATA_ENGINE(0)));
4064         amdgpu_ring_write(ring, 0);
4065         amdgpu_ring_write(ring, 0);
4066         amdgpu_ring_write(ring, 0);
4067         amdgpu_ring_write(ring, 0);
4068         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4069                                 adev->gds.gds_size);
4070
4071         amdgpu_ring_commit(ring);
4072
4073         for (i = 0; i < adev->usec_timeout; i++) {
4074                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4075                         break;
4076                 udelay(1);
4077         }
4078
4079         if (i >= adev->usec_timeout)
4080                 r = -ETIMEDOUT;
4081
4082         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4083
4084         return r;
4085 }
4086
4087 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4088 {
4089         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4090         struct amdgpu_ib ib;
4091         struct dma_fence *f = NULL;
4092         int r, i, j, k;
4093         unsigned total_size, vgpr_offset, sgpr_offset;
4094         u64 gpu_addr;
4095
4096         /* only support when RAS is enabled */
4097         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4098                 return 0;
4099
4100         /* bail if the compute ring is not ready */
4101         if (!ring->sched.ready)
4102                 return 0;
4103
4104         total_size =
4105                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4106         total_size +=
4107                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4108         total_size = ALIGN(total_size, 256);
4109         vgpr_offset = total_size;
4110         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4111         sgpr_offset = total_size;
4112         total_size += sizeof(sgpr_init_compute_shader);
4113
4114         /* allocate an indirect buffer to put the commands in */
4115         memset(&ib, 0, sizeof(ib));
4116         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4117         if (r) {
4118                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4119                 return r;
4120         }
4121
4122         /* load the compute shaders */
4123         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4124                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4125
4126         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4127                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4128
4129         /* init the ib length to 0 */
4130         ib.length_dw = 0;
4131
4132         /* VGPR */
4133         /* write the register state for the compute dispatch */
4134         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4135                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4136                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4137                                                                 - PACKET3_SET_SH_REG_START;
4138                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4139         }
4140         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4141         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4142         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4143         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4144                                                         - PACKET3_SET_SH_REG_START;
4145         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4146         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4147
4148         /* write dispatch packet */
4149         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4150         ib.ptr[ib.length_dw++] = 128; /* x */
4151         ib.ptr[ib.length_dw++] = 1; /* y */
4152         ib.ptr[ib.length_dw++] = 1; /* z */
4153         ib.ptr[ib.length_dw++] =
4154                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4155
4156         /* write CS partial flush packet */
4157         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4158         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4159
4160         /* SGPR */
4161         /* write the register state for the compute dispatch */
4162         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4163                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4164                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4165                                                                 - PACKET3_SET_SH_REG_START;
4166                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4167         }
4168         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4169         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4170         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4171         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4172                                                         - PACKET3_SET_SH_REG_START;
4173         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4174         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4175
4176         /* write dispatch packet */
4177         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4178         ib.ptr[ib.length_dw++] = 128; /* x */
4179         ib.ptr[ib.length_dw++] = 1; /* y */
4180         ib.ptr[ib.length_dw++] = 1; /* z */
4181         ib.ptr[ib.length_dw++] =
4182                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4183
4184         /* write CS partial flush packet */
4185         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4186         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4187
4188         /* shedule the ib on the ring */
4189         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4190         if (r) {
4191                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4192                 goto fail;
4193         }
4194
4195         /* wait for the GPU to finish processing the IB */
4196         r = dma_fence_wait(f, false);
4197         if (r) {
4198                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4199                 goto fail;
4200         }
4201
4202         /* read back registers to clear the counters */
4203         mutex_lock(&adev->grbm_idx_mutex);
4204         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4205                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4206                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4207                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4208                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4209                         }
4210                 }
4211         }
4212         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4213         mutex_unlock(&adev->grbm_idx_mutex);
4214
4215 fail:
4216         amdgpu_ib_free(adev, &ib, NULL);
4217         dma_fence_put(f);
4218
4219         return r;
4220 }
4221
4222 static int gfx_v9_0_early_init(void *handle)
4223 {
4224         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4225
4226         if (adev->asic_type == CHIP_ARCTURUS)
4227                 adev->gfx.num_gfx_rings = 0;
4228         else
4229                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4230         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4231         gfx_v9_0_set_ring_funcs(adev);
4232         gfx_v9_0_set_irq_funcs(adev);
4233         gfx_v9_0_set_gds_init(adev);
4234         gfx_v9_0_set_rlc_funcs(adev);
4235
4236         return 0;
4237 }
4238
4239 static int gfx_v9_0_ecc_late_init(void *handle)
4240 {
4241         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4242         int r;
4243
4244         r = amdgpu_gfx_ras_late_init(adev);
4245         if (r)
4246                 return r;
4247
4248         r = gfx_v9_0_do_edc_gds_workarounds(adev);
4249         if (r)
4250                 return r;
4251
4252         /* requires IBs so do in late init after IB pool is initialized */
4253         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4254         if (r)
4255                 return r;
4256
4257         return 0;
4258 }
4259
4260 static int gfx_v9_0_late_init(void *handle)
4261 {
4262         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4263         int r;
4264
4265         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4266         if (r)
4267                 return r;
4268
4269         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4270         if (r)
4271                 return r;
4272
4273         r = gfx_v9_0_ecc_late_init(handle);
4274         if (r)
4275                 return r;
4276
4277         return 0;
4278 }
4279
4280 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4281 {
4282         uint32_t rlc_setting;
4283
4284         /* if RLC is not enabled, do nothing */
4285         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4286         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4287                 return false;
4288
4289         return true;
4290 }
4291
4292 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4293 {
4294         uint32_t data;
4295         unsigned i;
4296
4297         data = RLC_SAFE_MODE__CMD_MASK;
4298         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4299         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4300
4301         /* wait for RLC_SAFE_MODE */
4302         for (i = 0; i < adev->usec_timeout; i++) {
4303                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4304                         break;
4305                 udelay(1);
4306         }
4307 }
4308
4309 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4310 {
4311         uint32_t data;
4312
4313         data = RLC_SAFE_MODE__CMD_MASK;
4314         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4315 }
4316
4317 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4318                                                 bool enable)
4319 {
4320         amdgpu_gfx_rlc_enter_safe_mode(adev);
4321
4322         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4323                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4324                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4325                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4326         } else {
4327                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4328                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4329                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4330         }
4331
4332         amdgpu_gfx_rlc_exit_safe_mode(adev);
4333 }
4334
4335 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4336                                                 bool enable)
4337 {
4338         /* TODO: double check if we need to perform under safe mode */
4339         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4340
4341         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4342                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4343         else
4344                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4345
4346         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4347                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4348         else
4349                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4350
4351         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4352 }
4353
4354 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4355                                                       bool enable)
4356 {
4357         uint32_t data, def;
4358
4359         amdgpu_gfx_rlc_enter_safe_mode(adev);
4360
4361         /* It is disabled by HW by default */
4362         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4363                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4364                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4365
4366                 if (adev->asic_type != CHIP_VEGA12)
4367                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4368
4369                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4370                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4371                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4372
4373                 /* only for Vega10 & Raven1 */
4374                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4375
4376                 if (def != data)
4377                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4378
4379                 /* MGLS is a global flag to control all MGLS in GFX */
4380                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4381                         /* 2 - RLC memory Light sleep */
4382                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4383                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4384                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4385                                 if (def != data)
4386                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4387                         }
4388                         /* 3 - CP memory Light sleep */
4389                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4390                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4391                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4392                                 if (def != data)
4393                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4394                         }
4395                 }
4396         } else {
4397                 /* 1 - MGCG_OVERRIDE */
4398                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4399
4400                 if (adev->asic_type != CHIP_VEGA12)
4401                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4402
4403                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4404                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4405                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4406                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4407
4408                 if (def != data)
4409                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4410
4411                 /* 2 - disable MGLS in RLC */
4412                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4413                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4414                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4415                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4416                 }
4417
4418                 /* 3 - disable MGLS in CP */
4419                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4420                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4421                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4422                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4423                 }
4424         }
4425
4426         amdgpu_gfx_rlc_exit_safe_mode(adev);
4427 }
4428
4429 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4430                                            bool enable)
4431 {
4432         uint32_t data, def;
4433
4434         if (adev->asic_type == CHIP_ARCTURUS)
4435                 return;
4436
4437         amdgpu_gfx_rlc_enter_safe_mode(adev);
4438
4439         /* Enable 3D CGCG/CGLS */
4440         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4441                 /* write cmd to clear cgcg/cgls ov */
4442                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4443                 /* unset CGCG override */
4444                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4445                 /* update CGCG and CGLS override bits */
4446                 if (def != data)
4447                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4448
4449                 /* enable 3Dcgcg FSM(0x0000363f) */
4450                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4451
4452                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4453                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4454                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4455                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4456                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4457                 if (def != data)
4458                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4459
4460                 /* set IDLE_POLL_COUNT(0x00900100) */
4461                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4462                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4463                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4464                 if (def != data)
4465                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4466         } else {
4467                 /* Disable CGCG/CGLS */
4468                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4469                 /* disable cgcg, cgls should be disabled */
4470                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4471                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4472                 /* disable cgcg and cgls in FSM */
4473                 if (def != data)
4474                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4475         }
4476
4477         amdgpu_gfx_rlc_exit_safe_mode(adev);
4478 }
4479
4480 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4481                                                       bool enable)
4482 {
4483         uint32_t def, data;
4484
4485         amdgpu_gfx_rlc_enter_safe_mode(adev);
4486
4487         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4488                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4489                 /* unset CGCG override */
4490                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4491                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4492                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4493                 else
4494                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4495                 /* update CGCG and CGLS override bits */
4496                 if (def != data)
4497                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4498
4499                 /* enable cgcg FSM(0x0000363F) */
4500                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4501
4502                 if (adev->asic_type == CHIP_ARCTURUS)
4503                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4504                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4505                 else
4506                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4507                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4508                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4509                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4510                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4511                 if (def != data)
4512                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4513
4514                 /* set IDLE_POLL_COUNT(0x00900100) */
4515                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4516                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4517                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4518                 if (def != data)
4519                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4520         } else {
4521                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4522                 /* reset CGCG/CGLS bits */
4523                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4524                 /* disable cgcg and cgls in FSM */
4525                 if (def != data)
4526                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4527         }
4528
4529         amdgpu_gfx_rlc_exit_safe_mode(adev);
4530 }
4531
4532 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4533                                             bool enable)
4534 {
4535         if (enable) {
4536                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4537                  * ===  MGCG + MGLS ===
4538                  */
4539                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4540                 /* ===  CGCG /CGLS for GFX 3D Only === */
4541                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4542                 /* ===  CGCG + CGLS === */
4543                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4544         } else {
4545                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4546                  * ===  CGCG + CGLS ===
4547                  */
4548                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4549                 /* ===  CGCG /CGLS for GFX 3D Only === */
4550                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4551                 /* ===  MGCG + MGLS === */
4552                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4553         }
4554         return 0;
4555 }
4556
4557 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4558         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4559         .set_safe_mode = gfx_v9_0_set_safe_mode,
4560         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4561         .init = gfx_v9_0_rlc_init,
4562         .get_csb_size = gfx_v9_0_get_csb_size,
4563         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4564         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4565         .resume = gfx_v9_0_rlc_resume,
4566         .stop = gfx_v9_0_rlc_stop,
4567         .reset = gfx_v9_0_rlc_reset,
4568         .start = gfx_v9_0_rlc_start
4569 };
4570
4571 static int gfx_v9_0_set_powergating_state(void *handle,
4572                                           enum amd_powergating_state state)
4573 {
4574         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4575         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4576
4577         switch (adev->asic_type) {
4578         case CHIP_RAVEN:
4579         case CHIP_RENOIR:
4580                 if (!enable) {
4581                         amdgpu_gfx_off_ctrl(adev, false);
4582                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4583                 }
4584                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4585                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4586                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4587                 } else {
4588                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4589                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4590                 }
4591
4592                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4593                         gfx_v9_0_enable_cp_power_gating(adev, true);
4594                 else
4595                         gfx_v9_0_enable_cp_power_gating(adev, false);
4596
4597                 /* update gfx cgpg state */
4598                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4599
4600                 /* update mgcg state */
4601                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4602
4603                 if (enable)
4604                         amdgpu_gfx_off_ctrl(adev, true);
4605                 break;
4606         case CHIP_VEGA12:
4607                 if (!enable) {
4608                         amdgpu_gfx_off_ctrl(adev, false);
4609                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4610                 } else {
4611                         amdgpu_gfx_off_ctrl(adev, true);
4612                 }
4613                 break;
4614         default:
4615                 break;
4616         }
4617
4618         return 0;
4619 }
4620
4621 static int gfx_v9_0_set_clockgating_state(void *handle,
4622                                           enum amd_clockgating_state state)
4623 {
4624         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4625
4626         if (amdgpu_sriov_vf(adev))
4627                 return 0;
4628
4629         switch (adev->asic_type) {
4630         case CHIP_VEGA10:
4631         case CHIP_VEGA12:
4632         case CHIP_VEGA20:
4633         case CHIP_RAVEN:
4634         case CHIP_ARCTURUS:
4635         case CHIP_RENOIR:
4636                 gfx_v9_0_update_gfx_clock_gating(adev,
4637                                                  state == AMD_CG_STATE_GATE ? true : false);
4638                 break;
4639         default:
4640                 break;
4641         }
4642         return 0;
4643 }
4644
4645 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4646 {
4647         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4648         int data;
4649
4650         if (amdgpu_sriov_vf(adev))
4651                 *flags = 0;
4652
4653         /* AMD_CG_SUPPORT_GFX_MGCG */
4654         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4655         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4656                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4657
4658         /* AMD_CG_SUPPORT_GFX_CGCG */
4659         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4660         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4661                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4662
4663         /* AMD_CG_SUPPORT_GFX_CGLS */
4664         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4665                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4666
4667         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4668         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4669         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4670                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4671
4672         /* AMD_CG_SUPPORT_GFX_CP_LS */
4673         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4674         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4675                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4676
4677         if (adev->asic_type != CHIP_ARCTURUS) {
4678                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4679                 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4680                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4681                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4682
4683                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4684                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4685                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4686         }
4687 }
4688
4689 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4690 {
4691         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4692 }
4693
4694 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4695 {
4696         struct amdgpu_device *adev = ring->adev;
4697         u64 wptr;
4698
4699         /* XXX check if swapping is necessary on BE */
4700         if (ring->use_doorbell) {
4701                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4702         } else {
4703                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4704                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4705         }
4706
4707         return wptr;
4708 }
4709
4710 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4711 {
4712         struct amdgpu_device *adev = ring->adev;
4713
4714         if (ring->use_doorbell) {
4715                 /* XXX check if swapping is necessary on BE */
4716                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4717                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4718         } else {
4719                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4720                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4721         }
4722 }
4723
4724 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4725 {
4726         struct amdgpu_device *adev = ring->adev;
4727         u32 ref_and_mask, reg_mem_engine;
4728         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
4729
4730         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4731                 switch (ring->me) {
4732                 case 1:
4733                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4734                         break;
4735                 case 2:
4736                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4737                         break;
4738                 default:
4739                         return;
4740                 }
4741                 reg_mem_engine = 0;
4742         } else {
4743                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4744                 reg_mem_engine = 1; /* pfp */
4745         }
4746
4747         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4748                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
4749                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
4750                               ref_and_mask, ref_and_mask, 0x20);
4751 }
4752
4753 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4754                                         struct amdgpu_job *job,
4755                                         struct amdgpu_ib *ib,
4756                                         uint32_t flags)
4757 {
4758         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4759         u32 header, control = 0;
4760
4761         if (ib->flags & AMDGPU_IB_FLAG_CE)
4762                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4763         else
4764                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4765
4766         control |= ib->length_dw | (vmid << 24);
4767
4768         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4769                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4770
4771                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4772                         gfx_v9_0_ring_emit_de_meta(ring);
4773         }
4774
4775         amdgpu_ring_write(ring, header);
4776         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4777         amdgpu_ring_write(ring,
4778 #ifdef __BIG_ENDIAN
4779                 (2 << 0) |
4780 #endif
4781                 lower_32_bits(ib->gpu_addr));
4782         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4783         amdgpu_ring_write(ring, control);
4784 }
4785
4786 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4787                                           struct amdgpu_job *job,
4788                                           struct amdgpu_ib *ib,
4789                                           uint32_t flags)
4790 {
4791         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4792         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4793
4794         /* Currently, there is a high possibility to get wave ID mismatch
4795          * between ME and GDS, leading to a hw deadlock, because ME generates
4796          * different wave IDs than the GDS expects. This situation happens
4797          * randomly when at least 5 compute pipes use GDS ordered append.
4798          * The wave IDs generated by ME are also wrong after suspend/resume.
4799          * Those are probably bugs somewhere else in the kernel driver.
4800          *
4801          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4802          * GDS to 0 for this ring (me/pipe).
4803          */
4804         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4805                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4806                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4807                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4808         }
4809
4810         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4811         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4812         amdgpu_ring_write(ring,
4813 #ifdef __BIG_ENDIAN
4814                                 (2 << 0) |
4815 #endif
4816                                 lower_32_bits(ib->gpu_addr));
4817         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4818         amdgpu_ring_write(ring, control);
4819 }
4820
4821 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4822                                      u64 seq, unsigned flags)
4823 {
4824         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4825         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4826         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4827
4828         /* RELEASE_MEM - flush caches, send int */
4829         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4830         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4831                                                EOP_TC_NC_ACTION_EN) :
4832                                               (EOP_TCL1_ACTION_EN |
4833                                                EOP_TC_ACTION_EN |
4834                                                EOP_TC_WB_ACTION_EN |
4835                                                EOP_TC_MD_ACTION_EN)) |
4836                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4837                                  EVENT_INDEX(5)));
4838         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4839
4840         /*
4841          * the address should be Qword aligned if 64bit write, Dword
4842          * aligned if only send 32bit data low (discard data high)
4843          */
4844         if (write64bit)
4845                 BUG_ON(addr & 0x7);
4846         else
4847                 BUG_ON(addr & 0x3);
4848         amdgpu_ring_write(ring, lower_32_bits(addr));
4849         amdgpu_ring_write(ring, upper_32_bits(addr));
4850         amdgpu_ring_write(ring, lower_32_bits(seq));
4851         amdgpu_ring_write(ring, upper_32_bits(seq));
4852         amdgpu_ring_write(ring, 0);
4853 }
4854
4855 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4856 {
4857         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4858         uint32_t seq = ring->fence_drv.sync_seq;
4859         uint64_t addr = ring->fence_drv.gpu_addr;
4860
4861         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4862                               lower_32_bits(addr), upper_32_bits(addr),
4863                               seq, 0xffffffff, 4);
4864 }
4865
4866 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4867                                         unsigned vmid, uint64_t pd_addr)
4868 {
4869         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4870
4871         /* compute doesn't have PFP */
4872         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4873                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4874                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4875                 amdgpu_ring_write(ring, 0x0);
4876         }
4877 }
4878
4879 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4880 {
4881         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4882 }
4883
4884 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4885 {
4886         u64 wptr;
4887
4888         /* XXX check if swapping is necessary on BE */
4889         if (ring->use_doorbell)
4890                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4891         else
4892                 BUG();
4893         return wptr;
4894 }
4895
4896 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4897                                            bool acquire)
4898 {
4899         struct amdgpu_device *adev = ring->adev;
4900         int pipe_num, tmp, reg;
4901         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4902
4903         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4904
4905         /* first me only has 2 entries, GFX and HP3D */
4906         if (ring->me > 0)
4907                 pipe_num -= 2;
4908
4909         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4910         tmp = RREG32(reg);
4911         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4912         WREG32(reg, tmp);
4913 }
4914
4915 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4916                                             struct amdgpu_ring *ring,
4917                                             bool acquire)
4918 {
4919         int i, pipe;
4920         bool reserve;
4921         struct amdgpu_ring *iring;
4922
4923         mutex_lock(&adev->gfx.pipe_reserve_mutex);
4924         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
4925         if (acquire)
4926                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4927         else
4928                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4929
4930         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4931                 /* Clear all reservations - everyone reacquires all resources */
4932                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4933                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4934                                                        true);
4935
4936                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4937                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4938                                                        true);
4939         } else {
4940                 /* Lower all pipes without a current reservation */
4941                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4942                         iring = &adev->gfx.gfx_ring[i];
4943                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4944                                                            iring->me,
4945                                                            iring->pipe,
4946                                                            0);
4947                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4948                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4949                 }
4950
4951                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4952                         iring = &adev->gfx.compute_ring[i];
4953                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4954                                                            iring->me,
4955                                                            iring->pipe,
4956                                                            0);
4957                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4958                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4959                 }
4960         }
4961
4962         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4963 }
4964
4965 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4966                                       struct amdgpu_ring *ring,
4967                                       bool acquire)
4968 {
4969         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4970         uint32_t queue_priority = acquire ? 0xf : 0x0;
4971
4972         mutex_lock(&adev->srbm_mutex);
4973         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4974
4975         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4976         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4977
4978         soc15_grbm_select(adev, 0, 0, 0, 0);
4979         mutex_unlock(&adev->srbm_mutex);
4980 }
4981
4982 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4983                                                enum drm_sched_priority priority)
4984 {
4985         struct amdgpu_device *adev = ring->adev;
4986         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4987
4988         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4989                 return;
4990
4991         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4992         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4993 }
4994
4995 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4996 {
4997         struct amdgpu_device *adev = ring->adev;
4998
4999         /* XXX check if swapping is necessary on BE */
5000         if (ring->use_doorbell) {
5001                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5002                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5003         } else{
5004                 BUG(); /* only DOORBELL method supported on gfx9 now */
5005         }
5006 }
5007
5008 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5009                                          u64 seq, unsigned int flags)
5010 {
5011         struct amdgpu_device *adev = ring->adev;
5012
5013         /* we only allocate 32bit for each seq wb address */
5014         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5015
5016         /* write fence seq to the "addr" */
5017         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5018         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5019                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5020         amdgpu_ring_write(ring, lower_32_bits(addr));
5021         amdgpu_ring_write(ring, upper_32_bits(addr));
5022         amdgpu_ring_write(ring, lower_32_bits(seq));
5023
5024         if (flags & AMDGPU_FENCE_FLAG_INT) {
5025                 /* set register to trigger INT */
5026                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5027                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5028                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5029                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5030                 amdgpu_ring_write(ring, 0);
5031                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5032         }
5033 }
5034
5035 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5036 {
5037         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5038         amdgpu_ring_write(ring, 0);
5039 }
5040
5041 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5042 {
5043         struct v9_ce_ib_state ce_payload = {0};
5044         uint64_t csa_addr;
5045         int cnt;
5046
5047         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5048         csa_addr = amdgpu_csa_vaddr(ring->adev);
5049
5050         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5051         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5052                                  WRITE_DATA_DST_SEL(8) |
5053                                  WR_CONFIRM) |
5054                                  WRITE_DATA_CACHE_POLICY(0));
5055         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5056         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5057         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5058 }
5059
5060 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5061 {
5062         struct v9_de_ib_state de_payload = {0};
5063         uint64_t csa_addr, gds_addr;
5064         int cnt;
5065
5066         csa_addr = amdgpu_csa_vaddr(ring->adev);
5067         gds_addr = csa_addr + 4096;
5068         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5069         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5070
5071         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5072         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5073         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5074                                  WRITE_DATA_DST_SEL(8) |
5075                                  WR_CONFIRM) |
5076                                  WRITE_DATA_CACHE_POLICY(0));
5077         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5078         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5079         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5080 }
5081
5082 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5083 {
5084         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5085         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5086 }
5087
5088 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5089 {
5090         uint32_t dw2 = 0;
5091
5092         if (amdgpu_sriov_vf(ring->adev))
5093                 gfx_v9_0_ring_emit_ce_meta(ring);
5094
5095         gfx_v9_0_ring_emit_tmz(ring, true);
5096
5097         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5098         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5099                 /* set load_global_config & load_global_uconfig */
5100                 dw2 |= 0x8001;
5101                 /* set load_cs_sh_regs */
5102                 dw2 |= 0x01000000;
5103                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5104                 dw2 |= 0x10002;
5105
5106                 /* set load_ce_ram if preamble presented */
5107                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5108                         dw2 |= 0x10000000;
5109         } else {
5110                 /* still load_ce_ram if this is the first time preamble presented
5111                  * although there is no context switch happens.
5112                  */
5113                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5114                         dw2 |= 0x10000000;
5115         }
5116
5117         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5118         amdgpu_ring_write(ring, dw2);
5119         amdgpu_ring_write(ring, 0);
5120 }
5121
5122 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5123 {
5124         unsigned ret;
5125         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5126         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5127         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5128         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5129         ret = ring->wptr & ring->buf_mask;
5130         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5131         return ret;
5132 }
5133
5134 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5135 {
5136         unsigned cur;
5137         BUG_ON(offset > ring->buf_mask);
5138         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5139
5140         cur = (ring->wptr & ring->buf_mask) - 1;
5141         if (likely(cur > offset))
5142                 ring->ring[offset] = cur - offset;
5143         else
5144                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5145 }
5146
5147 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5148 {
5149         struct amdgpu_device *adev = ring->adev;
5150
5151         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5152         amdgpu_ring_write(ring, 0 |     /* src: register*/
5153                                 (5 << 8) |      /* dst: memory */
5154                                 (1 << 20));     /* write confirm */
5155         amdgpu_ring_write(ring, reg);
5156         amdgpu_ring_write(ring, 0);
5157         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5158                                 adev->virt.reg_val_offs * 4));
5159         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5160                                 adev->virt.reg_val_offs * 4));
5161 }
5162
5163 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5164                                     uint32_t val)
5165 {
5166         uint32_t cmd = 0;
5167
5168         switch (ring->funcs->type) {
5169         case AMDGPU_RING_TYPE_GFX:
5170                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5171                 break;
5172         case AMDGPU_RING_TYPE_KIQ:
5173                 cmd = (1 << 16); /* no inc addr */
5174                 break;
5175         default:
5176                 cmd = WR_CONFIRM;
5177                 break;
5178         }
5179         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5180         amdgpu_ring_write(ring, cmd);
5181         amdgpu_ring_write(ring, reg);
5182         amdgpu_ring_write(ring, 0);
5183         amdgpu_ring_write(ring, val);
5184 }
5185
5186 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5187                                         uint32_t val, uint32_t mask)
5188 {
5189         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5190 }
5191
5192 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5193                                                   uint32_t reg0, uint32_t reg1,
5194                                                   uint32_t ref, uint32_t mask)
5195 {
5196         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5197         struct amdgpu_device *adev = ring->adev;
5198         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5199                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5200
5201         if (fw_version_ok)
5202                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5203                                       ref, mask, 0x20);
5204         else
5205                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5206                                                            ref, mask);
5207 }
5208
5209 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5210 {
5211         struct amdgpu_device *adev = ring->adev;
5212         uint32_t value = 0;
5213
5214         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5215         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5216         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5217         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5218         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5219 }
5220
5221 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5222                                                  enum amdgpu_interrupt_state state)
5223 {
5224         switch (state) {
5225         case AMDGPU_IRQ_STATE_DISABLE:
5226         case AMDGPU_IRQ_STATE_ENABLE:
5227                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5228                                TIME_STAMP_INT_ENABLE,
5229                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5230                 break;
5231         default:
5232                 break;
5233         }
5234 }
5235
5236 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5237                                                      int me, int pipe,
5238                                                      enum amdgpu_interrupt_state state)
5239 {
5240         u32 mec_int_cntl, mec_int_cntl_reg;
5241
5242         /*
5243          * amdgpu controls only the first MEC. That's why this function only
5244          * handles the setting of interrupts for this specific MEC. All other
5245          * pipes' interrupts are set by amdkfd.
5246          */
5247
5248         if (me == 1) {
5249                 switch (pipe) {
5250                 case 0:
5251                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5252                         break;
5253                 case 1:
5254                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5255                         break;
5256                 case 2:
5257                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5258                         break;
5259                 case 3:
5260                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5261                         break;
5262                 default:
5263                         DRM_DEBUG("invalid pipe %d\n", pipe);
5264                         return;
5265                 }
5266         } else {
5267                 DRM_DEBUG("invalid me %d\n", me);
5268                 return;
5269         }
5270
5271         switch (state) {
5272         case AMDGPU_IRQ_STATE_DISABLE:
5273                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5274                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5275                                              TIME_STAMP_INT_ENABLE, 0);
5276                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5277                 break;
5278         case AMDGPU_IRQ_STATE_ENABLE:
5279                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5280                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5281                                              TIME_STAMP_INT_ENABLE, 1);
5282                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5283                 break;
5284         default:
5285                 break;
5286         }
5287 }
5288
5289 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5290                                              struct amdgpu_irq_src *source,
5291                                              unsigned type,
5292                                              enum amdgpu_interrupt_state state)
5293 {
5294         switch (state) {
5295         case AMDGPU_IRQ_STATE_DISABLE:
5296         case AMDGPU_IRQ_STATE_ENABLE:
5297                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5298                                PRIV_REG_INT_ENABLE,
5299                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5300                 break;
5301         default:
5302                 break;
5303         }
5304
5305         return 0;
5306 }
5307
5308 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5309                                               struct amdgpu_irq_src *source,
5310                                               unsigned type,
5311                                               enum amdgpu_interrupt_state state)
5312 {
5313         switch (state) {
5314         case AMDGPU_IRQ_STATE_DISABLE:
5315         case AMDGPU_IRQ_STATE_ENABLE:
5316                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5317                                PRIV_INSTR_INT_ENABLE,
5318                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5319         default:
5320                 break;
5321         }
5322
5323         return 0;
5324 }
5325
5326 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5327         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5328                         CP_ECC_ERROR_INT_ENABLE, 1)
5329
5330 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5331         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5332                         CP_ECC_ERROR_INT_ENABLE, 0)
5333
5334 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5335                                               struct amdgpu_irq_src *source,
5336                                               unsigned type,
5337                                               enum amdgpu_interrupt_state state)
5338 {
5339         switch (state) {
5340         case AMDGPU_IRQ_STATE_DISABLE:
5341                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5342                                 CP_ECC_ERROR_INT_ENABLE, 0);
5343                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5344                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5345                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5346                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5347                 break;
5348
5349         case AMDGPU_IRQ_STATE_ENABLE:
5350                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5351                                 CP_ECC_ERROR_INT_ENABLE, 1);
5352                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5353                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5354                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5355                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5356                 break;
5357         default:
5358                 break;
5359         }
5360
5361         return 0;
5362 }
5363
5364
5365 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5366                                             struct amdgpu_irq_src *src,
5367                                             unsigned type,
5368                                             enum amdgpu_interrupt_state state)
5369 {
5370         switch (type) {
5371         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5372                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5373                 break;
5374         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5375                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5376                 break;
5377         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5378                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5379                 break;
5380         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5381                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5382                 break;
5383         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5384                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5385                 break;
5386         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5387                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5388                 break;
5389         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5390                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5391                 break;
5392         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5393                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5394                 break;
5395         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5396                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5397                 break;
5398         default:
5399                 break;
5400         }
5401         return 0;
5402 }
5403
5404 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5405                             struct amdgpu_irq_src *source,
5406                             struct amdgpu_iv_entry *entry)
5407 {
5408         int i;
5409         u8 me_id, pipe_id, queue_id;
5410         struct amdgpu_ring *ring;
5411
5412         DRM_DEBUG("IH: CP EOP\n");
5413         me_id = (entry->ring_id & 0x0c) >> 2;
5414         pipe_id = (entry->ring_id & 0x03) >> 0;
5415         queue_id = (entry->ring_id & 0x70) >> 4;
5416
5417         switch (me_id) {
5418         case 0:
5419                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5420                 break;
5421         case 1:
5422         case 2:
5423                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5424                         ring = &adev->gfx.compute_ring[i];
5425                         /* Per-queue interrupt is supported for MEC starting from VI.
5426                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5427                           */
5428                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5429                                 amdgpu_fence_process(ring);
5430                 }
5431                 break;
5432         }
5433         return 0;
5434 }
5435
5436 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5437                            struct amdgpu_iv_entry *entry)
5438 {
5439         u8 me_id, pipe_id, queue_id;
5440         struct amdgpu_ring *ring;
5441         int i;
5442
5443         me_id = (entry->ring_id & 0x0c) >> 2;
5444         pipe_id = (entry->ring_id & 0x03) >> 0;
5445         queue_id = (entry->ring_id & 0x70) >> 4;
5446
5447         switch (me_id) {
5448         case 0:
5449                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5450                 break;
5451         case 1:
5452         case 2:
5453                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5454                         ring = &adev->gfx.compute_ring[i];
5455                         if (ring->me == me_id && ring->pipe == pipe_id &&
5456                             ring->queue == queue_id)
5457                                 drm_sched_fault(&ring->sched);
5458                 }
5459                 break;
5460         }
5461 }
5462
5463 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5464                                  struct amdgpu_irq_src *source,
5465                                  struct amdgpu_iv_entry *entry)
5466 {
5467         DRM_ERROR("Illegal register access in command stream\n");
5468         gfx_v9_0_fault(adev, entry);
5469         return 0;
5470 }
5471
5472 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5473                                   struct amdgpu_irq_src *source,
5474                                   struct amdgpu_iv_entry *entry)
5475 {
5476         DRM_ERROR("Illegal instruction in command stream\n");
5477         gfx_v9_0_fault(adev, entry);
5478         return 0;
5479 }
5480
5481
5482 static const struct ras_gfx_subblock_reg ras_subblock_regs[] = {
5483         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5484           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5485           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5486         },
5487         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5488           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5489           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5490         },
5491         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5492           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5493           0, 0
5494         },
5495         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5496           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5497           0, 0
5498         },
5499         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5500           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5501           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5502         },
5503         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5504           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5505           0, 0
5506         },
5507         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5508           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5509           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5510         },
5511         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5512           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5513           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5514         },
5515         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
5516           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
5517           0, 0
5518         },
5519         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
5520           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
5521           0, 0
5522         },
5523         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
5524           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
5525           0, 0
5526         },
5527         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5528           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
5529           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
5530         },
5531         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
5532           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
5533           0, 0
5534         },
5535         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5536           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5537           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
5538         },
5539         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5540           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5541           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5542           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
5543         },
5544         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5545           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5546           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
5547           0, 0
5548         },
5549         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5550           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5551           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5552           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
5553         },
5554         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5555           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5556           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5557           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
5558         },
5559         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5560           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5561           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5562           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
5563         },
5564         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5565           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
5566           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5567           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
5568         },
5569         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
5570           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
5571           0, 0
5572         },
5573         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5574           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5575           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
5576         },
5577         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5578           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
5579           0, 0
5580         },
5581         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5582           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
5583           0, 0
5584         },
5585         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5586           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
5587           0, 0
5588         },
5589         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
5590           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
5591           0, 0
5592         },
5593         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5594           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
5595           0, 0
5596         },
5597         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
5598           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
5599           0, 0
5600         },
5601         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5602           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5603           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
5604         },
5605         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5606           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5607           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
5608         },
5609         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5610           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5611           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
5612         },
5613         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5614           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5615           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
5616         },
5617         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5618           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5619           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
5620         },
5621         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5622           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
5623           0, 0
5624         },
5625         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5626           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
5627           0, 0
5628         },
5629         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5630           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
5631           0, 0
5632         },
5633         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5634           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
5635           0, 0
5636         },
5637         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5638           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
5639           0, 0
5640         },
5641         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
5642           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
5643           0, 0
5644         },
5645         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5646           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
5647           0, 0
5648         },
5649         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5650           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
5651           0, 0
5652         },
5653         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5654           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
5655           0, 0
5656         },
5657         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5658           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5659           0, 0
5660         },
5661         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5662           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
5663           0, 0
5664         },
5665         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5666           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5667           0, 0
5668         },
5669         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5670           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
5671           0, 0
5672         },
5673         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
5674           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
5675           0, 0
5676         },
5677         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5678           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5679           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
5680         },
5681         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5682           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5683           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
5684         },
5685         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5686           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
5687           0, 0
5688         },
5689         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5690           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
5691           0, 0
5692         },
5693         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5694           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
5695           0, 0
5696         },
5697         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5698           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5699           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
5700         },
5701         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
5702           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5703           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
5704         },
5705         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5706           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5707           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
5708         },
5709         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5710           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5711           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
5712         },
5713         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
5714           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
5715           0, 0
5716         },
5717         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5718           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5719           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
5720         },
5721         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5722           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5723           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
5724         },
5725         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5726           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
5727           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
5728         },
5729         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5730           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5731           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
5732         },
5733         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5734           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5735           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
5736         },
5737         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5738           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5739           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
5740         },
5741         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
5742           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5743           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
5744         },
5745         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5746           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5747           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
5748         },
5749         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5750           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5751           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
5752         },
5753         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5754           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5755           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
5756         },
5757         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5758           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5759           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
5760         },
5761         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5762           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5763           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
5764         },
5765         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5766           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5767           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
5768         },
5769         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5770           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5771           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
5772         },
5773         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5774           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5775           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
5776         },
5777         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5778           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5779           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
5780         },
5781         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5782           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5783           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
5784         },
5785         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5786           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5787           0, 0
5788         },
5789         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5790           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
5791           0, 0
5792         },
5793         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5794           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
5795           0, 0
5796         },
5797         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5798           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
5799           0, 0
5800         },
5801         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5802           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
5803           0, 0
5804         },
5805         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
5806           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5807           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
5808         },
5809         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5810           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5811           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
5812         },
5813         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5814           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5815           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
5816         },
5817         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5818           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5819           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
5820         },
5821         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5822           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5823           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
5824         },
5825         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5826           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5827           0, 0
5828         },
5829         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5830           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
5831           0, 0
5832         },
5833         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5834           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
5835           0, 0
5836         },
5837         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5838           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
5839           0, 0
5840         },
5841         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
5842           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
5843           0, 0
5844         },
5845         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5846           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5847           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
5848         },
5849         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5850           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5851           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
5852         },
5853         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5854           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5855           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
5856         },
5857         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5858           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5859           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
5860         },
5861         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5862           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5863           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
5864         },
5865         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5866           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
5867           0, 0
5868         },
5869         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5870           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
5871           0, 0
5872         },
5873         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5874           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
5875           0, 0
5876         },
5877         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5878           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
5879           0, 0
5880         },
5881         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
5882           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
5883           0, 0
5884         },
5885         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5886           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5887           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
5888         },
5889         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5890           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5891           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
5892         },
5893         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5894           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5895           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
5896         },
5897         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5898           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
5899           0, 0
5900         },
5901         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5902           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
5903           0, 0
5904         },
5905         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5906           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
5907           0, 0
5908         },
5909         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5910           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
5911           0, 0
5912         },
5913         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5914           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
5915           0, 0
5916         },
5917         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
5918           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
5919           0, 0
5920         }
5921 };
5922
5923 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5924                                      void *inject_if)
5925 {
5926         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5927         int ret;
5928         struct ta_ras_trigger_error_input block_info = { 0 };
5929
5930         if (adev->asic_type != CHIP_VEGA20)
5931                 return -EINVAL;
5932
5933         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
5934                 return -EINVAL;
5935
5936         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5937                 return -EPERM;
5938
5939         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5940               info->head.type)) {
5941                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5942                         ras_gfx_subblocks[info->head.sub_block_index].name,
5943                         info->head.type);
5944                 return -EPERM;
5945         }
5946
5947         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5948               info->head.type)) {
5949                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5950                         ras_gfx_subblocks[info->head.sub_block_index].name,
5951                         info->head.type);
5952                 return -EPERM;
5953         }
5954
5955         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
5956         block_info.sub_block_index =
5957                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
5958         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
5959         block_info.address = info->address;
5960         block_info.value = info->value;
5961
5962         mutex_lock(&adev->grbm_idx_mutex);
5963         ret = psp_ras_trigger_error(&adev->psp, &block_info);
5964         mutex_unlock(&adev->grbm_idx_mutex);
5965
5966         return ret;
5967 }
5968
5969 static const char *vml2_mems[] = {
5970         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
5971         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
5972         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
5973         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
5974         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
5975         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
5976         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
5977         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
5978         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
5979         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
5980         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
5981         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
5982         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
5983         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
5984         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
5985         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
5986 };
5987
5988 static const char *vml2_walker_mems[] = {
5989         "UTC_VML2_CACHE_PDE0_MEM0",
5990         "UTC_VML2_CACHE_PDE0_MEM1",
5991         "UTC_VML2_CACHE_PDE1_MEM0",
5992         "UTC_VML2_CACHE_PDE1_MEM1",
5993         "UTC_VML2_CACHE_PDE2_MEM0",
5994         "UTC_VML2_CACHE_PDE2_MEM1",
5995         "UTC_VML2_RDIF_LOG_FIFO",
5996 };
5997
5998 static const char *atc_l2_cache_2m_mems[] = {
5999         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6000         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6001         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6002         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6003 };
6004
6005 static const char *atc_l2_cache_4k_mems[] = {
6006         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6007         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6008         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6009         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6010         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6011         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6012         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6013         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6014         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6015         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6016         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6017         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6018         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6019         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6020         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6021         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6022         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6023         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6024         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6025         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6026         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6027         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6028         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6029         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6030         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6031         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6032         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6033         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6034         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6035         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6036         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6037         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6038 };
6039
6040 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6041                                          struct ras_err_data *err_data)
6042 {
6043         uint32_t i, data;
6044         uint32_t sec_count, ded_count;
6045
6046         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6047         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6048         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6049         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6050         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6051         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6052         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6053         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6054
6055         for (i = 0; i < 16; i++) {
6056                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6057                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6058
6059                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6060                 if (sec_count) {
6061                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6062                                  vml2_mems[i], sec_count);
6063                         err_data->ce_count += sec_count;
6064                 }
6065
6066                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6067                 if (ded_count) {
6068                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6069                                  vml2_mems[i], ded_count);
6070                         err_data->ue_count += ded_count;
6071                 }
6072         }
6073
6074         for (i = 0; i < 7; i++) {
6075                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6076                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6077
6078                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6079                                                 SEC_COUNT);
6080                 if (sec_count) {
6081                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6082                                  vml2_walker_mems[i], sec_count);
6083                         err_data->ce_count += sec_count;
6084                 }
6085
6086                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6087                                                 DED_COUNT);
6088                 if (ded_count) {
6089                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6090                                  vml2_walker_mems[i], ded_count);
6091                         err_data->ue_count += ded_count;
6092                 }
6093         }
6094
6095         for (i = 0; i < 4; i++) {
6096                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6097                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6098
6099                 sec_count = (data & 0x00006000L) >> 0xd;
6100                 if (sec_count) {
6101                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6102                                  atc_l2_cache_2m_mems[i], sec_count);
6103                         err_data->ce_count += sec_count;
6104                 }
6105         }
6106
6107         for (i = 0; i < 32; i++) {
6108                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6109                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6110
6111                 sec_count = (data & 0x00006000L) >> 0xd;
6112                 if (sec_count) {
6113                         DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
6114                                  atc_l2_cache_4k_mems[i], sec_count);
6115                         err_data->ce_count += sec_count;
6116                 }
6117
6118                 ded_count = (data & 0x00018000L) >> 0xf;
6119                 if (ded_count) {
6120                         DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
6121                                  atc_l2_cache_4k_mems[i], ded_count);
6122                         err_data->ue_count += ded_count;
6123                 }
6124         }
6125
6126         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6127         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6128         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6129         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6130
6131         return 0;
6132 }
6133
6134 static int __get_ras_error_count(const struct soc15_reg_entry *reg,
6135         uint32_t se_id, uint32_t inst_id, uint32_t value,
6136         uint32_t *sec_count, uint32_t *ded_count)
6137 {
6138         uint32_t i;
6139         uint32_t sec_cnt, ded_cnt;
6140
6141         for (i = 0; i < ARRAY_SIZE(ras_subblock_regs); i++) {
6142                 if(ras_subblock_regs[i].reg_offset != reg->reg_offset ||
6143                         ras_subblock_regs[i].seg != reg->seg ||
6144                         ras_subblock_regs[i].inst != reg->inst)
6145                         continue;
6146
6147                 sec_cnt = (value &
6148                                 ras_subblock_regs[i].sec_count_mask) >>
6149                                 ras_subblock_regs[i].sec_count_shift;
6150                 if (sec_cnt) {
6151                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], SEC %d\n",
6152                                 ras_subblock_regs[i].name,
6153                                 se_id, inst_id,
6154                                 sec_cnt);
6155                         *sec_count += sec_cnt;
6156                 }
6157
6158                 ded_cnt = (value &
6159                                 ras_subblock_regs[i].ded_count_mask) >>
6160                                 ras_subblock_regs[i].ded_count_shift;
6161                 if (ded_cnt) {
6162                         DRM_INFO("GFX SubBlock %s, Instance[%d][%d], DED %d\n",
6163                                 ras_subblock_regs[i].name,
6164                                 se_id, inst_id,
6165                                 ded_cnt);
6166                         *ded_count += ded_cnt;
6167                 }
6168         }
6169
6170         return 0;
6171 }
6172
6173 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6174                                           void *ras_error_status)
6175 {
6176         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6177         uint32_t sec_count = 0, ded_count = 0;
6178         uint32_t i, j, k;
6179         uint32_t reg_value;
6180
6181         if (adev->asic_type != CHIP_VEGA20)
6182                 return -EINVAL;
6183
6184         err_data->ue_count = 0;
6185         err_data->ce_count = 0;
6186
6187         mutex_lock(&adev->grbm_idx_mutex);
6188
6189         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
6190                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
6191                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
6192                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6193                                 reg_value =
6194                                         RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
6195                                 if (reg_value)
6196                                         __get_ras_error_count(&sec_ded_counter_registers[i],
6197                                                         j, k, reg_value,
6198                                                         &sec_count, &ded_count);
6199                         }
6200                 }
6201         }
6202
6203         err_data->ce_count += sec_count;
6204         err_data->ue_count += ded_count;
6205
6206         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6207         mutex_unlock(&adev->grbm_idx_mutex);
6208
6209         gfx_v9_0_query_utc_edc_status(adev, err_data);
6210
6211         return 0;
6212 }
6213
6214 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6215         .name = "gfx_v9_0",
6216         .early_init = gfx_v9_0_early_init,
6217         .late_init = gfx_v9_0_late_init,
6218         .sw_init = gfx_v9_0_sw_init,
6219         .sw_fini = gfx_v9_0_sw_fini,
6220         .hw_init = gfx_v9_0_hw_init,
6221         .hw_fini = gfx_v9_0_hw_fini,
6222         .suspend = gfx_v9_0_suspend,
6223         .resume = gfx_v9_0_resume,
6224         .is_idle = gfx_v9_0_is_idle,
6225         .wait_for_idle = gfx_v9_0_wait_for_idle,
6226         .soft_reset = gfx_v9_0_soft_reset,
6227         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6228         .set_powergating_state = gfx_v9_0_set_powergating_state,
6229         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6230 };
6231
6232 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6233         .type = AMDGPU_RING_TYPE_GFX,
6234         .align_mask = 0xff,
6235         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6236         .support_64bit_ptrs = true,
6237         .vmhub = AMDGPU_GFXHUB_0,
6238         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6239         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6240         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6241         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6242                 5 +  /* COND_EXEC */
6243                 7 +  /* PIPELINE_SYNC */
6244                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6245                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6246                 2 + /* VM_FLUSH */
6247                 8 +  /* FENCE for VM_FLUSH */
6248                 20 + /* GDS switch */
6249                 4 + /* double SWITCH_BUFFER,
6250                        the first COND_EXEC jump to the place just
6251                            prior to this double SWITCH_BUFFER  */
6252                 5 + /* COND_EXEC */
6253                 7 +      /*     HDP_flush */
6254                 4 +      /*     VGT_flush */
6255                 14 + /* CE_META */
6256                 31 + /* DE_META */
6257                 3 + /* CNTX_CTRL */
6258                 5 + /* HDP_INVL */
6259                 8 + 8 + /* FENCE x2 */
6260                 2, /* SWITCH_BUFFER */
6261         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6262         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6263         .emit_fence = gfx_v9_0_ring_emit_fence,
6264         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6265         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6266         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6267         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6268         .test_ring = gfx_v9_0_ring_test_ring,
6269         .test_ib = gfx_v9_0_ring_test_ib,
6270         .insert_nop = amdgpu_ring_insert_nop,
6271         .pad_ib = amdgpu_ring_generic_pad_ib,
6272         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6273         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6274         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6275         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6276         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6277         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6278         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6279         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6280         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6281 };
6282
6283 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6284         .type = AMDGPU_RING_TYPE_COMPUTE,
6285         .align_mask = 0xff,
6286         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6287         .support_64bit_ptrs = true,
6288         .vmhub = AMDGPU_GFXHUB_0,
6289         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6290         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6291         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6292         .emit_frame_size =
6293                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6294                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6295                 5 + /* hdp invalidate */
6296                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6297                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6298                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6299                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6300                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6301         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6302         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6303         .emit_fence = gfx_v9_0_ring_emit_fence,
6304         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6305         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6306         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6307         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6308         .test_ring = gfx_v9_0_ring_test_ring,
6309         .test_ib = gfx_v9_0_ring_test_ib,
6310         .insert_nop = amdgpu_ring_insert_nop,
6311         .pad_ib = amdgpu_ring_generic_pad_ib,
6312         .set_priority = gfx_v9_0_ring_set_priority_compute,
6313         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6314         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6315         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6316 };
6317
6318 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6319         .type = AMDGPU_RING_TYPE_KIQ,
6320         .align_mask = 0xff,
6321         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6322         .support_64bit_ptrs = true,
6323         .vmhub = AMDGPU_GFXHUB_0,
6324         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6325         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6326         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6327         .emit_frame_size =
6328                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6329                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6330                 5 + /* hdp invalidate */
6331                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6332                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6333                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6334                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6335                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6336         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6337         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6338         .test_ring = gfx_v9_0_ring_test_ring,
6339         .insert_nop = amdgpu_ring_insert_nop,
6340         .pad_ib = amdgpu_ring_generic_pad_ib,
6341         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6342         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6343         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6344         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6345 };
6346
6347 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6348 {
6349         int i;
6350
6351         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6352
6353         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6354                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6355
6356         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6357                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6358 }
6359
6360 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6361         .set = gfx_v9_0_set_eop_interrupt_state,
6362         .process = gfx_v9_0_eop_irq,
6363 };
6364
6365 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6366         .set = gfx_v9_0_set_priv_reg_fault_state,
6367         .process = gfx_v9_0_priv_reg_irq,
6368 };
6369
6370 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6371         .set = gfx_v9_0_set_priv_inst_fault_state,
6372         .process = gfx_v9_0_priv_inst_irq,
6373 };
6374
6375 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6376         .set = gfx_v9_0_set_cp_ecc_error_state,
6377         .process = amdgpu_gfx_cp_ecc_error_irq,
6378 };
6379
6380
6381 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6382 {
6383         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6384         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6385
6386         adev->gfx.priv_reg_irq.num_types = 1;
6387         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6388
6389         adev->gfx.priv_inst_irq.num_types = 1;
6390         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6391
6392         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6393         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6394 }
6395
6396 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6397 {
6398         switch (adev->asic_type) {
6399         case CHIP_VEGA10:
6400         case CHIP_VEGA12:
6401         case CHIP_VEGA20:
6402         case CHIP_RAVEN:
6403         case CHIP_ARCTURUS:
6404         case CHIP_RENOIR:
6405                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6406                 break;
6407         default:
6408                 break;
6409         }
6410 }
6411
6412 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6413 {
6414         /* init asci gds info */
6415         switch (adev->asic_type) {
6416         case CHIP_VEGA10:
6417         case CHIP_VEGA12:
6418         case CHIP_VEGA20:
6419                 adev->gds.gds_size = 0x10000;
6420                 break;
6421         case CHIP_RAVEN:
6422         case CHIP_ARCTURUS:
6423                 adev->gds.gds_size = 0x1000;
6424                 break;
6425         default:
6426                 adev->gds.gds_size = 0x10000;
6427                 break;
6428         }
6429
6430         switch (adev->asic_type) {
6431         case CHIP_VEGA10:
6432         case CHIP_VEGA20:
6433                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6434                 break;
6435         case CHIP_VEGA12:
6436                 adev->gds.gds_compute_max_wave_id = 0x27f;
6437                 break;
6438         case CHIP_RAVEN:
6439                 if (adev->rev_id >= 0x8)
6440                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6441                 else
6442                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6443                 break;
6444         case CHIP_ARCTURUS:
6445                 adev->gds.gds_compute_max_wave_id = 0xfff;
6446                 break;
6447         default:
6448                 /* this really depends on the chip */
6449                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6450                 break;
6451         }
6452
6453         adev->gds.gws_size = 64;
6454         adev->gds.oa_size = 16;
6455 }
6456
6457 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6458                                                  u32 bitmap)
6459 {
6460         u32 data;
6461
6462         if (!bitmap)
6463                 return;
6464
6465         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6466         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6467
6468         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6469 }
6470
6471 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6472 {
6473         u32 data, mask;
6474
6475         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6476         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6477
6478         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6479         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6480
6481         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6482
6483         return (~data) & mask;
6484 }
6485
6486 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6487                                  struct amdgpu_cu_info *cu_info)
6488 {
6489         int i, j, k, counter, active_cu_number = 0;
6490         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6491         unsigned disable_masks[4 * 4];
6492
6493         if (!adev || !cu_info)
6494                 return -EINVAL;
6495
6496         /*
6497          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6498          */
6499         if (adev->gfx.config.max_shader_engines *
6500                 adev->gfx.config.max_sh_per_se > 16)
6501                 return -EINVAL;
6502
6503         amdgpu_gfx_parse_disable_cu(disable_masks,
6504                                     adev->gfx.config.max_shader_engines,
6505                                     adev->gfx.config.max_sh_per_se);
6506
6507         mutex_lock(&adev->grbm_idx_mutex);
6508         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6509                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6510                         mask = 1;
6511                         ao_bitmap = 0;
6512                         counter = 0;
6513                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6514                         gfx_v9_0_set_user_cu_inactive_bitmap(
6515                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6516                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6517
6518                         /*
6519                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6520                          * 4x4 size array, and it's usually suitable for Vega
6521                          * ASICs which has 4*2 SE/SH layout.
6522                          * But for Arcturus, SE/SH layout is changed to 8*1.
6523                          * To mostly reduce the impact, we make it compatible
6524                          * with current bitmap array as below:
6525                          *    SE4,SH0 --> bitmap[0][1]
6526                          *    SE5,SH0 --> bitmap[1][1]
6527                          *    SE6,SH0 --> bitmap[2][1]
6528                          *    SE7,SH0 --> bitmap[3][1]
6529                          */
6530                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6531
6532                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6533                                 if (bitmap & mask) {
6534                                         if (counter < adev->gfx.config.max_cu_per_sh)
6535                                                 ao_bitmap |= mask;
6536                                         counter ++;
6537                                 }
6538                                 mask <<= 1;
6539                         }
6540                         active_cu_number += counter;
6541                         if (i < 2 && j < 2)
6542                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6543                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6544                 }
6545         }
6546         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6547         mutex_unlock(&adev->grbm_idx_mutex);
6548
6549         cu_info->number = active_cu_number;
6550         cu_info->ao_cu_mask = ao_cu_mask;
6551         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6552
6553         return 0;
6554 }
6555
6556 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6557 {
6558         .type = AMD_IP_BLOCK_TYPE_GFX,
6559         .major = 9,
6560         .minor = 0,
6561         .rev = 0,
6562         .funcs = &gfx_v9_0_ip_funcs,
6563 };