Merge drm/drm-next into drm-misc-next
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120
121 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
123 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
133
134 enum ta_ras_gfx_subblock {
135         /*CPC*/
136         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
137         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
138         TA_RAS_BLOCK__GFX_CPC_UCODE,
139         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
140         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
141         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
142         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
143         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
144         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146         /* CPF*/
147         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
150         TA_RAS_BLOCK__GFX_CPF_TAG,
151         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
152         /* CPG*/
153         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
156         TA_RAS_BLOCK__GFX_CPG_TAG,
157         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
158         /* GDS*/
159         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
162         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
163         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
164         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166         /* SPI*/
167         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
168         /* SQ*/
169         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171         TA_RAS_BLOCK__GFX_SQ_LDS_D,
172         TA_RAS_BLOCK__GFX_SQ_LDS_I,
173         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
174         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
175         /* SQC (3 ranges)*/
176         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177         /* SQC range 0*/
178         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
180                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
181         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
182         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
186         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
188                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189         /* SQC range 1*/
190         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
192                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
194         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
196         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
197         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
202                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203         /* SQC range 2*/
204         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
206                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
208         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
216                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
218         /* TA*/
219         TA_RAS_BLOCK__GFX_TA_INDEX_START,
220         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
221         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
222         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
223         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
224         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226         /* TCA*/
227         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231         /* TCC (5 sub-ranges)*/
232         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233         /* TCC range 0*/
234         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
236         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
239         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
240         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
241         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
242         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244         /* TCC range 1*/
245         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
249                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250         /* TCC range 2*/
251         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
254         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
255         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
256         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
257         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
258         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
259         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
261                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262         /* TCC range 3*/
263         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
267                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268         /* TCC range 4*/
269         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
271                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
274                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
276         /* TCI*/
277         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
278         /* TCP*/
279         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
282         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
283         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
284         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
285         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
286         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288         /* TD*/
289         TA_RAS_BLOCK__GFX_TD_INDEX_START,
290         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
291         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
292         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294         /* EA (3 sub-ranges)*/
295         TA_RAS_BLOCK__GFX_EA_INDEX_START,
296         /* EA range 0*/
297         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
298         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
299         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
300         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
301         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
302         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
303         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
304         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307         /* EA range 1*/
308         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
311         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
312         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
313         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
314         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
315         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317         /* EA range 2*/
318         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
321         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
322         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
325         /* UTC VM L2 bank*/
326         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
327         /* UTC VM walker*/
328         TA_RAS_BLOCK__UTC_VML2_WALKER,
329         /* UTC ATC L2 2MB cache*/
330         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
331         /* UTC ATC L2 4KB cache*/
332         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
333         TA_RAS_BLOCK__GFX_MAX
334 };
335
336 struct ras_gfx_subblock {
337         unsigned char *name;
338         int ta_subblock;
339         int hw_supported_error_type;
340         int sw_supported_error_type;
341 };
342
343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
344         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
345                 #subblock,                                                     \
346                 TA_RAS_BLOCK__##subblock,                                      \
347                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
348                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
349         }
350
351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
352         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
353         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
354         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
355         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
366         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
369                              0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
371                              0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
380                              0, 0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
382                              0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
384                              0, 0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
386                              0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
388                              0, 0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
390                              0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
392                              1),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
394                              0, 0, 0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
396                              0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
406                              0, 0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412                              0, 0, 0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424                              0, 0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426                              0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
428         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
436                              1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
440                              1),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
442                              0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
444                              0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
457                              0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
460                              0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
462                              0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
464                              0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
499 };
500
501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
502 {
503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
523 };
524
525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
526 {
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
545 };
546
547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
548 {
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
560 };
561
562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
563 {
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
588 };
589
590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
591 {
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
599 };
600
601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
602 {
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
622 };
623
624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
625 {
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
638 };
639
640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
641 {
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
645 };
646
647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
648 {
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
665 };
666
667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
668 {
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
682 };
683
684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
685 {
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
694 };
695
696 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
697 {
698         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
699         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
700         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 };
707
708 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
709 {
710         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
711         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
712         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 };
719
720 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
721 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
722 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
723 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
724
725 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
726 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
729 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
730                                  struct amdgpu_cu_info *cu_info);
731 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
732 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
733 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
734 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
735 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
736                                           void *ras_error_status);
737 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
738                                      void *inject_if);
739
740 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
741 {
742         switch (adev->asic_type) {
743         case CHIP_VEGA10:
744                 soc15_program_register_sequence(adev,
745                                                 golden_settings_gc_9_0,
746                                                 ARRAY_SIZE(golden_settings_gc_9_0));
747                 soc15_program_register_sequence(adev,
748                                                 golden_settings_gc_9_0_vg10,
749                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
750                 break;
751         case CHIP_VEGA12:
752                 soc15_program_register_sequence(adev,
753                                                 golden_settings_gc_9_2_1,
754                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
755                 soc15_program_register_sequence(adev,
756                                                 golden_settings_gc_9_2_1_vg12,
757                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
758                 break;
759         case CHIP_VEGA20:
760                 soc15_program_register_sequence(adev,
761                                                 golden_settings_gc_9_0,
762                                                 ARRAY_SIZE(golden_settings_gc_9_0));
763                 soc15_program_register_sequence(adev,
764                                                 golden_settings_gc_9_0_vg20,
765                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
766                 break;
767         case CHIP_ARCTURUS:
768                 soc15_program_register_sequence(adev,
769                                                 golden_settings_gc_9_4_1_arct,
770                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
771                 break;
772         case CHIP_RAVEN:
773                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
774                                                 ARRAY_SIZE(golden_settings_gc_9_1));
775                 if (adev->rev_id >= 8)
776                         soc15_program_register_sequence(adev,
777                                                         golden_settings_gc_9_1_rv2,
778                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
779                 else
780                         soc15_program_register_sequence(adev,
781                                                         golden_settings_gc_9_1_rv1,
782                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
783                 break;
784          case CHIP_RENOIR:
785                 soc15_program_register_sequence(adev,
786                                                 golden_settings_gc_9_1_rn,
787                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
788                 return; /* for renoir, don't need common goldensetting */
789         default:
790                 break;
791         }
792
793         if (adev->asic_type != CHIP_ARCTURUS)
794                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
795                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
796 }
797
798 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
799 {
800         adev->gfx.scratch.num_reg = 8;
801         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
802         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
803 }
804
805 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
806                                        bool wc, uint32_t reg, uint32_t val)
807 {
808         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
809         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
810                                 WRITE_DATA_DST_SEL(0) |
811                                 (wc ? WR_CONFIRM : 0));
812         amdgpu_ring_write(ring, reg);
813         amdgpu_ring_write(ring, 0);
814         amdgpu_ring_write(ring, val);
815 }
816
817 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
818                                   int mem_space, int opt, uint32_t addr0,
819                                   uint32_t addr1, uint32_t ref, uint32_t mask,
820                                   uint32_t inv)
821 {
822         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
823         amdgpu_ring_write(ring,
824                                  /* memory (1) or register (0) */
825                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
826                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
827                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
828                                  WAIT_REG_MEM_ENGINE(eng_sel)));
829
830         if (mem_space)
831                 BUG_ON(addr0 & 0x3); /* Dword align */
832         amdgpu_ring_write(ring, addr0);
833         amdgpu_ring_write(ring, addr1);
834         amdgpu_ring_write(ring, ref);
835         amdgpu_ring_write(ring, mask);
836         amdgpu_ring_write(ring, inv); /* poll interval */
837 }
838
839 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
840 {
841         struct amdgpu_device *adev = ring->adev;
842         uint32_t scratch;
843         uint32_t tmp = 0;
844         unsigned i;
845         int r;
846
847         r = amdgpu_gfx_scratch_get(adev, &scratch);
848         if (r)
849                 return r;
850
851         WREG32(scratch, 0xCAFEDEAD);
852         r = amdgpu_ring_alloc(ring, 3);
853         if (r)
854                 goto error_free_scratch;
855
856         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
857         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
858         amdgpu_ring_write(ring, 0xDEADBEEF);
859         amdgpu_ring_commit(ring);
860
861         for (i = 0; i < adev->usec_timeout; i++) {
862                 tmp = RREG32(scratch);
863                 if (tmp == 0xDEADBEEF)
864                         break;
865                 udelay(1);
866         }
867
868         if (i >= adev->usec_timeout)
869                 r = -ETIMEDOUT;
870
871 error_free_scratch:
872         amdgpu_gfx_scratch_free(adev, scratch);
873         return r;
874 }
875
876 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
877 {
878         struct amdgpu_device *adev = ring->adev;
879         struct amdgpu_ib ib;
880         struct dma_fence *f = NULL;
881
882         unsigned index;
883         uint64_t gpu_addr;
884         uint32_t tmp;
885         long r;
886
887         r = amdgpu_device_wb_get(adev, &index);
888         if (r)
889                 return r;
890
891         gpu_addr = adev->wb.gpu_addr + (index * 4);
892         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
893         memset(&ib, 0, sizeof(ib));
894         r = amdgpu_ib_get(adev, NULL, 16, &ib);
895         if (r)
896                 goto err1;
897
898         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
899         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
900         ib.ptr[2] = lower_32_bits(gpu_addr);
901         ib.ptr[3] = upper_32_bits(gpu_addr);
902         ib.ptr[4] = 0xDEADBEEF;
903         ib.length_dw = 5;
904
905         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
906         if (r)
907                 goto err2;
908
909         r = dma_fence_wait_timeout(f, false, timeout);
910         if (r == 0) {
911                 r = -ETIMEDOUT;
912                 goto err2;
913         } else if (r < 0) {
914                 goto err2;
915         }
916
917         tmp = adev->wb.wb[index];
918         if (tmp == 0xDEADBEEF)
919                 r = 0;
920         else
921                 r = -EINVAL;
922
923 err2:
924         amdgpu_ib_free(adev, &ib, NULL);
925         dma_fence_put(f);
926 err1:
927         amdgpu_device_wb_free(adev, index);
928         return r;
929 }
930
931
932 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
933 {
934         release_firmware(adev->gfx.pfp_fw);
935         adev->gfx.pfp_fw = NULL;
936         release_firmware(adev->gfx.me_fw);
937         adev->gfx.me_fw = NULL;
938         release_firmware(adev->gfx.ce_fw);
939         adev->gfx.ce_fw = NULL;
940         release_firmware(adev->gfx.rlc_fw);
941         adev->gfx.rlc_fw = NULL;
942         release_firmware(adev->gfx.mec_fw);
943         adev->gfx.mec_fw = NULL;
944         release_firmware(adev->gfx.mec2_fw);
945         adev->gfx.mec2_fw = NULL;
946
947         kfree(adev->gfx.rlc.register_list_format);
948 }
949
950 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
951 {
952         const struct rlc_firmware_header_v2_1 *rlc_hdr;
953
954         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
955         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
956         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
957         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
958         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
959         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
960         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
961         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
962         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
963         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
964         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
965         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
966         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
967         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
968                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
969 }
970
971 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
972 {
973         adev->gfx.me_fw_write_wait = false;
974         adev->gfx.mec_fw_write_wait = false;
975
976         switch (adev->asic_type) {
977         case CHIP_VEGA10:
978                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
979                     (adev->gfx.me_feature_version >= 42) &&
980                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
981                     (adev->gfx.pfp_feature_version >= 42))
982                         adev->gfx.me_fw_write_wait = true;
983
984                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
985                     (adev->gfx.mec_feature_version >= 42))
986                         adev->gfx.mec_fw_write_wait = true;
987                 break;
988         case CHIP_VEGA12:
989                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
990                     (adev->gfx.me_feature_version >= 44) &&
991                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
992                     (adev->gfx.pfp_feature_version >= 44))
993                         adev->gfx.me_fw_write_wait = true;
994
995                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
996                     (adev->gfx.mec_feature_version >= 44))
997                         adev->gfx.mec_fw_write_wait = true;
998                 break;
999         case CHIP_VEGA20:
1000                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1001                     (adev->gfx.me_feature_version >= 44) &&
1002                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1003                     (adev->gfx.pfp_feature_version >= 44))
1004                         adev->gfx.me_fw_write_wait = true;
1005
1006                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1007                     (adev->gfx.mec_feature_version >= 44))
1008                         adev->gfx.mec_fw_write_wait = true;
1009                 break;
1010         case CHIP_RAVEN:
1011                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1012                     (adev->gfx.me_feature_version >= 42) &&
1013                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1014                     (adev->gfx.pfp_feature_version >= 42))
1015                         adev->gfx.me_fw_write_wait = true;
1016
1017                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1018                     (adev->gfx.mec_feature_version >= 42))
1019                         adev->gfx.mec_fw_write_wait = true;
1020                 break;
1021         default:
1022                 break;
1023         }
1024 }
1025
1026 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1027 {
1028         switch (adev->asic_type) {
1029         case CHIP_VEGA10:
1030         case CHIP_VEGA12:
1031         case CHIP_VEGA20:
1032                 break;
1033         case CHIP_RAVEN:
1034                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1035                         &&((adev->gfx.rlc_fw_version != 106 &&
1036                              adev->gfx.rlc_fw_version < 531) ||
1037                             (adev->gfx.rlc_fw_version == 53815) ||
1038                             (adev->gfx.rlc_feature_version < 1) ||
1039                             !adev->gfx.rlc.is_rlc_v2_1))
1040                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1041
1042                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1043                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1044                                 AMD_PG_SUPPORT_CP |
1045                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1046                 break;
1047         default:
1048                 break;
1049         }
1050 }
1051
1052 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1053                                           const char *chip_name)
1054 {
1055         char fw_name[30];
1056         int err;
1057         struct amdgpu_firmware_info *info = NULL;
1058         const struct common_firmware_header *header = NULL;
1059         const struct gfx_firmware_header_v1_0 *cp_hdr;
1060
1061         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1062         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1063         if (err)
1064                 goto out;
1065         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1066         if (err)
1067                 goto out;
1068         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1069         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1070         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1071
1072         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1073         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1074         if (err)
1075                 goto out;
1076         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1077         if (err)
1078                 goto out;
1079         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1080         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1081         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1082
1083         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1084         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1085         if (err)
1086                 goto out;
1087         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1088         if (err)
1089                 goto out;
1090         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1091         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1092         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1093
1094         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1095                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1096                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1097                 info->fw = adev->gfx.pfp_fw;
1098                 header = (const struct common_firmware_header *)info->fw->data;
1099                 adev->firmware.fw_size +=
1100                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1101
1102                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1103                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1104                 info->fw = adev->gfx.me_fw;
1105                 header = (const struct common_firmware_header *)info->fw->data;
1106                 adev->firmware.fw_size +=
1107                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1108
1109                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1110                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1111                 info->fw = adev->gfx.ce_fw;
1112                 header = (const struct common_firmware_header *)info->fw->data;
1113                 adev->firmware.fw_size +=
1114                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1115         }
1116
1117 out:
1118         if (err) {
1119                 dev_err(adev->dev,
1120                         "gfx9: Failed to load firmware \"%s\"\n",
1121                         fw_name);
1122                 release_firmware(adev->gfx.pfp_fw);
1123                 adev->gfx.pfp_fw = NULL;
1124                 release_firmware(adev->gfx.me_fw);
1125                 adev->gfx.me_fw = NULL;
1126                 release_firmware(adev->gfx.ce_fw);
1127                 adev->gfx.ce_fw = NULL;
1128         }
1129         return err;
1130 }
1131
1132 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1133                                           const char *chip_name)
1134 {
1135         char fw_name[30];
1136         int err;
1137         struct amdgpu_firmware_info *info = NULL;
1138         const struct common_firmware_header *header = NULL;
1139         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1140         unsigned int *tmp = NULL;
1141         unsigned int i = 0;
1142         uint16_t version_major;
1143         uint16_t version_minor;
1144         uint32_t smu_version;
1145
1146         /*
1147          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1148          * instead of picasso_rlc.bin.
1149          * Judgment method:
1150          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1151          *          or revision >= 0xD8 && revision <= 0xDF
1152          * otherwise is PCO FP5
1153          */
1154         if (!strcmp(chip_name, "picasso") &&
1155                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1156                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1157                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1158         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1159                 (smu_version >= 0x41e2b))
1160                 /**
1161                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1162                 */
1163                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1164         else
1165                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1166         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1167         if (err)
1168                 goto out;
1169         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1170         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1171
1172         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1173         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1174         if (version_major == 2 && version_minor == 1)
1175                 adev->gfx.rlc.is_rlc_v2_1 = true;
1176
1177         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1178         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1179         adev->gfx.rlc.save_and_restore_offset =
1180                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1181         adev->gfx.rlc.clear_state_descriptor_offset =
1182                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1183         adev->gfx.rlc.avail_scratch_ram_locations =
1184                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1185         adev->gfx.rlc.reg_restore_list_size =
1186                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1187         adev->gfx.rlc.reg_list_format_start =
1188                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1189         adev->gfx.rlc.reg_list_format_separate_start =
1190                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1191         adev->gfx.rlc.starting_offsets_start =
1192                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1193         adev->gfx.rlc.reg_list_format_size_bytes =
1194                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1195         adev->gfx.rlc.reg_list_size_bytes =
1196                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1197         adev->gfx.rlc.register_list_format =
1198                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1199                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1200         if (!adev->gfx.rlc.register_list_format) {
1201                 err = -ENOMEM;
1202                 goto out;
1203         }
1204
1205         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1206                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1207         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1208                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1209
1210         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1211
1212         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1213                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1214         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1215                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1216
1217         if (adev->gfx.rlc.is_rlc_v2_1)
1218                 gfx_v9_0_init_rlc_ext_microcode(adev);
1219
1220         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1221                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1222                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1223                 info->fw = adev->gfx.rlc_fw;
1224                 header = (const struct common_firmware_header *)info->fw->data;
1225                 adev->firmware.fw_size +=
1226                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1227
1228                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1229                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1230                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1231                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1232                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1233                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1234                         info->fw = adev->gfx.rlc_fw;
1235                         adev->firmware.fw_size +=
1236                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1237
1238                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1239                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1240                         info->fw = adev->gfx.rlc_fw;
1241                         adev->firmware.fw_size +=
1242                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1243
1244                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1245                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1246                         info->fw = adev->gfx.rlc_fw;
1247                         adev->firmware.fw_size +=
1248                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1249                 }
1250         }
1251
1252 out:
1253         if (err) {
1254                 dev_err(adev->dev,
1255                         "gfx9: Failed to load firmware \"%s\"\n",
1256                         fw_name);
1257                 release_firmware(adev->gfx.rlc_fw);
1258                 adev->gfx.rlc_fw = NULL;
1259         }
1260         return err;
1261 }
1262
1263 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1264                                           const char *chip_name)
1265 {
1266         char fw_name[30];
1267         int err;
1268         struct amdgpu_firmware_info *info = NULL;
1269         const struct common_firmware_header *header = NULL;
1270         const struct gfx_firmware_header_v1_0 *cp_hdr;
1271
1272         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1273         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1274         if (err)
1275                 goto out;
1276         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1277         if (err)
1278                 goto out;
1279         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1280         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1281         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1282
1283
1284         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1285         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1286         if (!err) {
1287                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1288                 if (err)
1289                         goto out;
1290                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1291                 adev->gfx.mec2_fw->data;
1292                 adev->gfx.mec2_fw_version =
1293                 le32_to_cpu(cp_hdr->header.ucode_version);
1294                 adev->gfx.mec2_feature_version =
1295                 le32_to_cpu(cp_hdr->ucode_feature_version);
1296         } else {
1297                 err = 0;
1298                 adev->gfx.mec2_fw = NULL;
1299         }
1300
1301         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1302                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1303                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1304                 info->fw = adev->gfx.mec_fw;
1305                 header = (const struct common_firmware_header *)info->fw->data;
1306                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1307                 adev->firmware.fw_size +=
1308                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1309
1310                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1311                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1312                 info->fw = adev->gfx.mec_fw;
1313                 adev->firmware.fw_size +=
1314                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1315
1316                 if (adev->gfx.mec2_fw) {
1317                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1318                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1319                         info->fw = adev->gfx.mec2_fw;
1320                         header = (const struct common_firmware_header *)info->fw->data;
1321                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1322                         adev->firmware.fw_size +=
1323                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1324
1325                         /* TODO: Determine if MEC2 JT FW loading can be removed
1326                                  for all GFX V9 asic and above */
1327                         if (adev->asic_type != CHIP_ARCTURUS) {
1328                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1329                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1330                                 info->fw = adev->gfx.mec2_fw;
1331                                 adev->firmware.fw_size +=
1332                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1333                                         PAGE_SIZE);
1334                         }
1335                 }
1336         }
1337
1338 out:
1339         gfx_v9_0_check_if_need_gfxoff(adev);
1340         gfx_v9_0_check_fw_write_wait(adev);
1341         if (err) {
1342                 dev_err(adev->dev,
1343                         "gfx9: Failed to load firmware \"%s\"\n",
1344                         fw_name);
1345                 release_firmware(adev->gfx.mec_fw);
1346                 adev->gfx.mec_fw = NULL;
1347                 release_firmware(adev->gfx.mec2_fw);
1348                 adev->gfx.mec2_fw = NULL;
1349         }
1350         return err;
1351 }
1352
1353 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1354 {
1355         const char *chip_name;
1356         int r;
1357
1358         DRM_DEBUG("\n");
1359
1360         switch (adev->asic_type) {
1361         case CHIP_VEGA10:
1362                 chip_name = "vega10";
1363                 break;
1364         case CHIP_VEGA12:
1365                 chip_name = "vega12";
1366                 break;
1367         case CHIP_VEGA20:
1368                 chip_name = "vega20";
1369                 break;
1370         case CHIP_RAVEN:
1371                 if (adev->rev_id >= 8)
1372                         chip_name = "raven2";
1373                 else if (adev->pdev->device == 0x15d8)
1374                         chip_name = "picasso";
1375                 else
1376                         chip_name = "raven";
1377                 break;
1378         case CHIP_ARCTURUS:
1379                 chip_name = "arcturus";
1380                 break;
1381         case CHIP_RENOIR:
1382                 chip_name = "renoir";
1383                 break;
1384         default:
1385                 BUG();
1386         }
1387
1388         /* No CPG in Arcturus */
1389         if (adev->asic_type != CHIP_ARCTURUS) {
1390                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1391                 if (r)
1392                         return r;
1393         }
1394
1395         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1396         if (r)
1397                 return r;
1398
1399         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1400         if (r)
1401                 return r;
1402
1403         return r;
1404 }
1405
1406 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1407 {
1408         u32 count = 0;
1409         const struct cs_section_def *sect = NULL;
1410         const struct cs_extent_def *ext = NULL;
1411
1412         /* begin clear state */
1413         count += 2;
1414         /* context control state */
1415         count += 3;
1416
1417         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1418                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1419                         if (sect->id == SECT_CONTEXT)
1420                                 count += 2 + ext->reg_count;
1421                         else
1422                                 return 0;
1423                 }
1424         }
1425
1426         /* end clear state */
1427         count += 2;
1428         /* clear state */
1429         count += 2;
1430
1431         return count;
1432 }
1433
1434 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1435                                     volatile u32 *buffer)
1436 {
1437         u32 count = 0, i;
1438         const struct cs_section_def *sect = NULL;
1439         const struct cs_extent_def *ext = NULL;
1440
1441         if (adev->gfx.rlc.cs_data == NULL)
1442                 return;
1443         if (buffer == NULL)
1444                 return;
1445
1446         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1447         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1448
1449         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1450         buffer[count++] = cpu_to_le32(0x80000000);
1451         buffer[count++] = cpu_to_le32(0x80000000);
1452
1453         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1454                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1455                         if (sect->id == SECT_CONTEXT) {
1456                                 buffer[count++] =
1457                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1458                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1459                                                 PACKET3_SET_CONTEXT_REG_START);
1460                                 for (i = 0; i < ext->reg_count; i++)
1461                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1462                         } else {
1463                                 return;
1464                         }
1465                 }
1466         }
1467
1468         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1469         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1470
1471         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1472         buffer[count++] = cpu_to_le32(0);
1473 }
1474
1475 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1476 {
1477         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1478         uint32_t pg_always_on_cu_num = 2;
1479         uint32_t always_on_cu_num;
1480         uint32_t i, j, k;
1481         uint32_t mask, cu_bitmap, counter;
1482
1483         if (adev->flags & AMD_IS_APU)
1484                 always_on_cu_num = 4;
1485         else if (adev->asic_type == CHIP_VEGA12)
1486                 always_on_cu_num = 8;
1487         else
1488                 always_on_cu_num = 12;
1489
1490         mutex_lock(&adev->grbm_idx_mutex);
1491         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1492                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1493                         mask = 1;
1494                         cu_bitmap = 0;
1495                         counter = 0;
1496                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1497
1498                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1499                                 if (cu_info->bitmap[i][j] & mask) {
1500                                         if (counter == pg_always_on_cu_num)
1501                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1502                                         if (counter < always_on_cu_num)
1503                                                 cu_bitmap |= mask;
1504                                         else
1505                                                 break;
1506                                         counter++;
1507                                 }
1508                                 mask <<= 1;
1509                         }
1510
1511                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1512                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1513                 }
1514         }
1515         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1516         mutex_unlock(&adev->grbm_idx_mutex);
1517 }
1518
1519 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1520 {
1521         uint32_t data;
1522
1523         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1524         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1525         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1526         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1527         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1528
1529         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1530         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1531
1532         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1533         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1534
1535         mutex_lock(&adev->grbm_idx_mutex);
1536         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1537         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1538         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1539
1540         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1541         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1542         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1543         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1544         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1545
1546         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1547         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1548         data &= 0x0000FFFF;
1549         data |= 0x00C00000;
1550         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1551
1552         /*
1553          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1554          * programmed in gfx_v9_0_init_always_on_cu_mask()
1555          */
1556
1557         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1558          * but used for RLC_LB_CNTL configuration */
1559         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1560         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1561         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1562         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1563         mutex_unlock(&adev->grbm_idx_mutex);
1564
1565         gfx_v9_0_init_always_on_cu_mask(adev);
1566 }
1567
1568 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1569 {
1570         uint32_t data;
1571
1572         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1573         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1574         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1575         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1576         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1577
1578         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1579         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1580
1581         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1582         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1583
1584         mutex_lock(&adev->grbm_idx_mutex);
1585         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1586         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1587         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1588
1589         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1590         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1591         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1592         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1593         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1594
1595         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1596         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1597         data &= 0x0000FFFF;
1598         data |= 0x00C00000;
1599         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1600
1601         /*
1602          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1603          * programmed in gfx_v9_0_init_always_on_cu_mask()
1604          */
1605
1606         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1607          * but used for RLC_LB_CNTL configuration */
1608         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1609         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1610         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1611         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1612         mutex_unlock(&adev->grbm_idx_mutex);
1613
1614         gfx_v9_0_init_always_on_cu_mask(adev);
1615 }
1616
1617 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1618 {
1619         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1620 }
1621
1622 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1623 {
1624         return 5;
1625 }
1626
1627 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1628 {
1629         const struct cs_section_def *cs_data;
1630         int r;
1631
1632         adev->gfx.rlc.cs_data = gfx9_cs_data;
1633
1634         cs_data = adev->gfx.rlc.cs_data;
1635
1636         if (cs_data) {
1637                 /* init clear state block */
1638                 r = amdgpu_gfx_rlc_init_csb(adev);
1639                 if (r)
1640                         return r;
1641         }
1642
1643         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1644                 /* TODO: double check the cp_table_size for RV */
1645                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1646                 r = amdgpu_gfx_rlc_init_cpt(adev);
1647                 if (r)
1648                         return r;
1649         }
1650
1651         switch (adev->asic_type) {
1652         case CHIP_RAVEN:
1653                 gfx_v9_0_init_lbpw(adev);
1654                 break;
1655         case CHIP_VEGA20:
1656                 gfx_v9_4_init_lbpw(adev);
1657                 break;
1658         default:
1659                 break;
1660         }
1661
1662         return 0;
1663 }
1664
1665 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1666 {
1667         int r;
1668
1669         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1670         if (unlikely(r != 0))
1671                 return r;
1672
1673         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1674                         AMDGPU_GEM_DOMAIN_VRAM);
1675         if (!r)
1676                 adev->gfx.rlc.clear_state_gpu_addr =
1677                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1678
1679         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1680
1681         return r;
1682 }
1683
1684 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1685 {
1686         int r;
1687
1688         if (!adev->gfx.rlc.clear_state_obj)
1689                 return;
1690
1691         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1692         if (likely(r == 0)) {
1693                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1694                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1695         }
1696 }
1697
1698 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1699 {
1700         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1701         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1702 }
1703
1704 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1705 {
1706         int r;
1707         u32 *hpd;
1708         const __le32 *fw_data;
1709         unsigned fw_size;
1710         u32 *fw;
1711         size_t mec_hpd_size;
1712
1713         const struct gfx_firmware_header_v1_0 *mec_hdr;
1714
1715         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1716
1717         /* take ownership of the relevant compute queues */
1718         amdgpu_gfx_compute_queue_acquire(adev);
1719         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1720
1721         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1722                                       AMDGPU_GEM_DOMAIN_VRAM,
1723                                       &adev->gfx.mec.hpd_eop_obj,
1724                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1725                                       (void **)&hpd);
1726         if (r) {
1727                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1728                 gfx_v9_0_mec_fini(adev);
1729                 return r;
1730         }
1731
1732         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1733
1734         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1735         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1736
1737         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1738
1739         fw_data = (const __le32 *)
1740                 (adev->gfx.mec_fw->data +
1741                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1742         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1743
1744         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1745                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1746                                       &adev->gfx.mec.mec_fw_obj,
1747                                       &adev->gfx.mec.mec_fw_gpu_addr,
1748                                       (void **)&fw);
1749         if (r) {
1750                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1751                 gfx_v9_0_mec_fini(adev);
1752                 return r;
1753         }
1754
1755         memcpy(fw, fw_data, fw_size);
1756
1757         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1758         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1759
1760         return 0;
1761 }
1762
1763 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1764 {
1765         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1766                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1767                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1768                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1769                 (SQ_IND_INDEX__FORCE_READ_MASK));
1770         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1771 }
1772
1773 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1774                            uint32_t wave, uint32_t thread,
1775                            uint32_t regno, uint32_t num, uint32_t *out)
1776 {
1777         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1778                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1779                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1780                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1781                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1782                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1783                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1784         while (num--)
1785                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1786 }
1787
1788 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1789 {
1790         /* type 1 wave data */
1791         dst[(*no_fields)++] = 1;
1792         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1793         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1794         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1795         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1796         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1797         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1798         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1799         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1800         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1801         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1802         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1803         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1804         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1805         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1806 }
1807
1808 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1809                                      uint32_t wave, uint32_t start,
1810                                      uint32_t size, uint32_t *dst)
1811 {
1812         wave_read_regs(
1813                 adev, simd, wave, 0,
1814                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1815 }
1816
1817 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1818                                      uint32_t wave, uint32_t thread,
1819                                      uint32_t start, uint32_t size,
1820                                      uint32_t *dst)
1821 {
1822         wave_read_regs(
1823                 adev, simd, wave, thread,
1824                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1825 }
1826
1827 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1828                                   u32 me, u32 pipe, u32 q, u32 vm)
1829 {
1830         soc15_grbm_select(adev, me, pipe, q, vm);
1831 }
1832
1833 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1834         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1835         .select_se_sh = &gfx_v9_0_select_se_sh,
1836         .read_wave_data = &gfx_v9_0_read_wave_data,
1837         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1838         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1839         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1840         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1841         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1842 };
1843
1844 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1845 {
1846         u32 gb_addr_config;
1847         int err;
1848
1849         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1850
1851         switch (adev->asic_type) {
1852         case CHIP_VEGA10:
1853                 adev->gfx.config.max_hw_contexts = 8;
1854                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1855                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1856                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1857                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1858                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1859                 break;
1860         case CHIP_VEGA12:
1861                 adev->gfx.config.max_hw_contexts = 8;
1862                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1863                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1864                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1865                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1866                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1867                 DRM_INFO("fix gfx.config for vega12\n");
1868                 break;
1869         case CHIP_VEGA20:
1870                 adev->gfx.config.max_hw_contexts = 8;
1871                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1872                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1873                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1874                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1875                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1876                 gb_addr_config &= ~0xf3e777ff;
1877                 gb_addr_config |= 0x22014042;
1878                 /* check vbios table if gpu info is not available */
1879                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1880                 if (err)
1881                         return err;
1882                 break;
1883         case CHIP_RAVEN:
1884                 adev->gfx.config.max_hw_contexts = 8;
1885                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1886                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1887                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1888                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1889                 if (adev->rev_id >= 8)
1890                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1891                 else
1892                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1893                 break;
1894         case CHIP_ARCTURUS:
1895                 adev->gfx.config.max_hw_contexts = 8;
1896                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1897                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1898                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1899                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1900                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1901                 gb_addr_config &= ~0xf3e777ff;
1902                 gb_addr_config |= 0x22014042;
1903                 break;
1904         case CHIP_RENOIR:
1905                 adev->gfx.config.max_hw_contexts = 8;
1906                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1907                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1908                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1909                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1910                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1911                 gb_addr_config &= ~0xf3e777ff;
1912                 gb_addr_config |= 0x22010042;
1913                 break;
1914         default:
1915                 BUG();
1916                 break;
1917         }
1918
1919         adev->gfx.config.gb_addr_config = gb_addr_config;
1920
1921         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1922                         REG_GET_FIELD(
1923                                         adev->gfx.config.gb_addr_config,
1924                                         GB_ADDR_CONFIG,
1925                                         NUM_PIPES);
1926
1927         adev->gfx.config.max_tile_pipes =
1928                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1929
1930         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1931                         REG_GET_FIELD(
1932                                         adev->gfx.config.gb_addr_config,
1933                                         GB_ADDR_CONFIG,
1934                                         NUM_BANKS);
1935         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1936                         REG_GET_FIELD(
1937                                         adev->gfx.config.gb_addr_config,
1938                                         GB_ADDR_CONFIG,
1939                                         MAX_COMPRESSED_FRAGS);
1940         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1941                         REG_GET_FIELD(
1942                                         adev->gfx.config.gb_addr_config,
1943                                         GB_ADDR_CONFIG,
1944                                         NUM_RB_PER_SE);
1945         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1946                         REG_GET_FIELD(
1947                                         adev->gfx.config.gb_addr_config,
1948                                         GB_ADDR_CONFIG,
1949                                         NUM_SHADER_ENGINES);
1950         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1951                         REG_GET_FIELD(
1952                                         adev->gfx.config.gb_addr_config,
1953                                         GB_ADDR_CONFIG,
1954                                         PIPE_INTERLEAVE_SIZE));
1955
1956         return 0;
1957 }
1958
1959 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1960                                    struct amdgpu_ngg_buf *ngg_buf,
1961                                    int size_se,
1962                                    int default_size_se)
1963 {
1964         int r;
1965
1966         if (size_se < 0) {
1967                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1968                 return -EINVAL;
1969         }
1970         size_se = size_se ? size_se : default_size_se;
1971
1972         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1973         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1974                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1975                                     &ngg_buf->bo,
1976                                     &ngg_buf->gpu_addr,
1977                                     NULL);
1978         if (r) {
1979                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1980                 return r;
1981         }
1982         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1983
1984         return r;
1985 }
1986
1987 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1988 {
1989         int i;
1990
1991         for (i = 0; i < NGG_BUF_MAX; i++)
1992                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1993                                       &adev->gfx.ngg.buf[i].gpu_addr,
1994                                       NULL);
1995
1996         memset(&adev->gfx.ngg.buf[0], 0,
1997                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1998
1999         adev->gfx.ngg.init = false;
2000
2001         return 0;
2002 }
2003
2004 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
2005 {
2006         int r;
2007
2008         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
2009                 return 0;
2010
2011         /* GDS reserve memory: 64 bytes alignment */
2012         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
2013         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
2014         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
2015         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
2016
2017         /* Primitive Buffer */
2018         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
2019                                     amdgpu_prim_buf_per_se,
2020                                     64 * 1024);
2021         if (r) {
2022                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
2023                 goto err;
2024         }
2025
2026         /* Position Buffer */
2027         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2028                                     amdgpu_pos_buf_per_se,
2029                                     256 * 1024);
2030         if (r) {
2031                 dev_err(adev->dev, "Failed to create Position Buffer\n");
2032                 goto err;
2033         }
2034
2035         /* Control Sideband */
2036         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2037                                     amdgpu_cntl_sb_buf_per_se,
2038                                     256);
2039         if (r) {
2040                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2041                 goto err;
2042         }
2043
2044         /* Parameter Cache, not created by default */
2045         if (amdgpu_param_buf_per_se <= 0)
2046                 goto out;
2047
2048         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2049                                     amdgpu_param_buf_per_se,
2050                                     512 * 1024);
2051         if (r) {
2052                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
2053                 goto err;
2054         }
2055
2056 out:
2057         adev->gfx.ngg.init = true;
2058         return 0;
2059 err:
2060         gfx_v9_0_ngg_fini(adev);
2061         return r;
2062 }
2063
2064 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2065 {
2066         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2067         int r;
2068         u32 data, base;
2069
2070         if (!amdgpu_ngg)
2071                 return 0;
2072
2073         /* Program buffer size */
2074         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2075                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2076         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2077                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
2078         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2079
2080         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2081                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2082         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2083                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2084         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2085
2086         /* Program buffer base address */
2087         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2088         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2089         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2090
2091         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2092         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2093         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2094
2095         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2096         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2097         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2098
2099         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2100         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2101         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2102
2103         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2104         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2105         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2106
2107         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2108         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2109         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2110
2111         /* Clear GDS reserved memory */
2112         r = amdgpu_ring_alloc(ring, 17);
2113         if (r) {
2114                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2115                           ring->name, r);
2116                 return r;
2117         }
2118
2119         gfx_v9_0_write_data_to_reg(ring, 0, false,
2120                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2121                                    (adev->gds.gds_size +
2122                                     adev->gfx.ngg.gds_reserve_size));
2123
2124         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2125         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2126                                 PACKET3_DMA_DATA_DST_SEL(1) |
2127                                 PACKET3_DMA_DATA_SRC_SEL(2)));
2128         amdgpu_ring_write(ring, 0);
2129         amdgpu_ring_write(ring, 0);
2130         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2131         amdgpu_ring_write(ring, 0);
2132         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2133                                 adev->gfx.ngg.gds_reserve_size);
2134
2135         gfx_v9_0_write_data_to_reg(ring, 0, false,
2136                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2137
2138         amdgpu_ring_commit(ring);
2139
2140         return 0;
2141 }
2142
2143 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2144                                       int mec, int pipe, int queue)
2145 {
2146         int r;
2147         unsigned irq_type;
2148         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2149
2150         ring = &adev->gfx.compute_ring[ring_id];
2151
2152         /* mec0 is me1 */
2153         ring->me = mec + 1;
2154         ring->pipe = pipe;
2155         ring->queue = queue;
2156
2157         ring->ring_obj = NULL;
2158         ring->use_doorbell = true;
2159         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2160         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2161                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2162         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2163
2164         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2165                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2166                 + ring->pipe;
2167
2168         /* type-2 packets are deprecated on MEC, use type-3 instead */
2169         r = amdgpu_ring_init(adev, ring, 1024,
2170                              &adev->gfx.eop_irq, irq_type);
2171         if (r)
2172                 return r;
2173
2174
2175         return 0;
2176 }
2177
2178 static int gfx_v9_0_sw_init(void *handle)
2179 {
2180         int i, j, k, r, ring_id;
2181         struct amdgpu_ring *ring;
2182         struct amdgpu_kiq *kiq;
2183         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2184
2185         switch (adev->asic_type) {
2186         case CHIP_VEGA10:
2187         case CHIP_VEGA12:
2188         case CHIP_VEGA20:
2189         case CHIP_RAVEN:
2190         case CHIP_ARCTURUS:
2191         case CHIP_RENOIR:
2192                 adev->gfx.mec.num_mec = 2;
2193                 break;
2194         default:
2195                 adev->gfx.mec.num_mec = 1;
2196                 break;
2197         }
2198
2199         adev->gfx.mec.num_pipe_per_mec = 4;
2200         adev->gfx.mec.num_queue_per_pipe = 8;
2201
2202         /* EOP Event */
2203         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2204         if (r)
2205                 return r;
2206
2207         /* Privileged reg */
2208         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2209                               &adev->gfx.priv_reg_irq);
2210         if (r)
2211                 return r;
2212
2213         /* Privileged inst */
2214         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2215                               &adev->gfx.priv_inst_irq);
2216         if (r)
2217                 return r;
2218
2219         /* ECC error */
2220         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2221                               &adev->gfx.cp_ecc_error_irq);
2222         if (r)
2223                 return r;
2224
2225         /* FUE error */
2226         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2227                               &adev->gfx.cp_ecc_error_irq);
2228         if (r)
2229                 return r;
2230
2231         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2232
2233         gfx_v9_0_scratch_init(adev);
2234
2235         r = gfx_v9_0_init_microcode(adev);
2236         if (r) {
2237                 DRM_ERROR("Failed to load gfx firmware!\n");
2238                 return r;
2239         }
2240
2241         r = adev->gfx.rlc.funcs->init(adev);
2242         if (r) {
2243                 DRM_ERROR("Failed to init rlc BOs!\n");
2244                 return r;
2245         }
2246
2247         r = gfx_v9_0_mec_init(adev);
2248         if (r) {
2249                 DRM_ERROR("Failed to init MEC BOs!\n");
2250                 return r;
2251         }
2252
2253         /* set up the gfx ring */
2254         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2255                 ring = &adev->gfx.gfx_ring[i];
2256                 ring->ring_obj = NULL;
2257                 if (!i)
2258                         sprintf(ring->name, "gfx");
2259                 else
2260                         sprintf(ring->name, "gfx_%d", i);
2261                 ring->use_doorbell = true;
2262                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2263                 r = amdgpu_ring_init(adev, ring, 1024,
2264                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2265                 if (r)
2266                         return r;
2267         }
2268
2269         /* set up the compute queues - allocate horizontally across pipes */
2270         ring_id = 0;
2271         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2272                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2273                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2274                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2275                                         continue;
2276
2277                                 r = gfx_v9_0_compute_ring_init(adev,
2278                                                                ring_id,
2279                                                                i, k, j);
2280                                 if (r)
2281                                         return r;
2282
2283                                 ring_id++;
2284                         }
2285                 }
2286         }
2287
2288         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2289         if (r) {
2290                 DRM_ERROR("Failed to init KIQ BOs!\n");
2291                 return r;
2292         }
2293
2294         kiq = &adev->gfx.kiq;
2295         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2296         if (r)
2297                 return r;
2298
2299         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2300         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2301         if (r)
2302                 return r;
2303
2304         adev->gfx.ce_ram_size = 0x8000;
2305
2306         r = gfx_v9_0_gpu_early_init(adev);
2307         if (r)
2308                 return r;
2309
2310         r = gfx_v9_0_ngg_init(adev);
2311         if (r)
2312                 return r;
2313
2314         return 0;
2315 }
2316
2317
2318 static int gfx_v9_0_sw_fini(void *handle)
2319 {
2320         int i;
2321         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2322
2323         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2324                         adev->gfx.ras_if) {
2325                 struct ras_common_if *ras_if = adev->gfx.ras_if;
2326                 struct ras_ih_if ih_info = {
2327                         .head = *ras_if,
2328                 };
2329
2330                 amdgpu_ras_debugfs_remove(adev, ras_if);
2331                 amdgpu_ras_sysfs_remove(adev, ras_if);
2332                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2333                 amdgpu_ras_feature_enable(adev, ras_if, 0);
2334                 kfree(ras_if);
2335         }
2336
2337         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2338                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2339         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2340                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2341
2342         amdgpu_gfx_mqd_sw_fini(adev);
2343         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2344         amdgpu_gfx_kiq_fini(adev);
2345
2346         gfx_v9_0_mec_fini(adev);
2347         gfx_v9_0_ngg_fini(adev);
2348         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2349         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2350                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2351                                 &adev->gfx.rlc.cp_table_gpu_addr,
2352                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2353         }
2354         gfx_v9_0_free_microcode(adev);
2355
2356         return 0;
2357 }
2358
2359
2360 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2361 {
2362         /* TODO */
2363 }
2364
2365 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2366 {
2367         u32 data;
2368
2369         if (instance == 0xffffffff)
2370                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2371         else
2372                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2373
2374         if (se_num == 0xffffffff)
2375                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2376         else
2377                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2378
2379         if (sh_num == 0xffffffff)
2380                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2381         else
2382                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2383
2384         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2385 }
2386
2387 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2388 {
2389         u32 data, mask;
2390
2391         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2392         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2393
2394         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2395         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2396
2397         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2398                                          adev->gfx.config.max_sh_per_se);
2399
2400         return (~data) & mask;
2401 }
2402
2403 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2404 {
2405         int i, j;
2406         u32 data;
2407         u32 active_rbs = 0;
2408         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2409                                         adev->gfx.config.max_sh_per_se;
2410
2411         mutex_lock(&adev->grbm_idx_mutex);
2412         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2413                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2414                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2415                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2416                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2417                                                rb_bitmap_width_per_sh);
2418                 }
2419         }
2420         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2421         mutex_unlock(&adev->grbm_idx_mutex);
2422
2423         adev->gfx.config.backend_enable_mask = active_rbs;
2424         adev->gfx.config.num_rbs = hweight32(active_rbs);
2425 }
2426
2427 #define DEFAULT_SH_MEM_BASES    (0x6000)
2428 #define FIRST_COMPUTE_VMID      (8)
2429 #define LAST_COMPUTE_VMID       (16)
2430 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2431 {
2432         int i;
2433         uint32_t sh_mem_config;
2434         uint32_t sh_mem_bases;
2435
2436         /*
2437          * Configure apertures:
2438          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2439          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2440          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2441          */
2442         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2443
2444         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2445                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2446                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2447
2448         mutex_lock(&adev->srbm_mutex);
2449         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2450                 soc15_grbm_select(adev, 0, 0, 0, i);
2451                 /* CP and shaders */
2452                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2453                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2454         }
2455         soc15_grbm_select(adev, 0, 0, 0, 0);
2456         mutex_unlock(&adev->srbm_mutex);
2457
2458         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2459            acccess. These should be enabled by FW for target VMIDs. */
2460         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2461                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2462                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2463                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2464                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2465         }
2466 }
2467
2468 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2469 {
2470         int vmid;
2471
2472         /*
2473          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2474          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2475          * the driver can enable them for graphics. VMID0 should maintain
2476          * access so that HWS firmware can save/restore entries.
2477          */
2478         for (vmid = 1; vmid < 16; vmid++) {
2479                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2480                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2481                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2482                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2483         }
2484 }
2485
2486 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2487 {
2488         u32 tmp;
2489         int i;
2490
2491         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2492
2493         gfx_v9_0_tiling_mode_table_init(adev);
2494
2495         gfx_v9_0_setup_rb(adev);
2496         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2497         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2498
2499         /* XXX SH_MEM regs */
2500         /* where to put LDS, scratch, GPUVM in FSA64 space */
2501         mutex_lock(&adev->srbm_mutex);
2502         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2503                 soc15_grbm_select(adev, 0, 0, 0, i);
2504                 /* CP and shaders */
2505                 if (i == 0) {
2506                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2507                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2508                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2509                                             !!amdgpu_noretry);
2510                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2511                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2512                 } else {
2513                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2514                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2515                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2516                                             !!amdgpu_noretry);
2517                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2518                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2519                                 (adev->gmc.private_aperture_start >> 48));
2520                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2521                                 (adev->gmc.shared_aperture_start >> 48));
2522                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2523                 }
2524         }
2525         soc15_grbm_select(adev, 0, 0, 0, 0);
2526
2527         mutex_unlock(&adev->srbm_mutex);
2528
2529         gfx_v9_0_init_compute_vmid(adev);
2530         gfx_v9_0_init_gds_vmid(adev);
2531 }
2532
2533 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2534 {
2535         u32 i, j, k;
2536         u32 mask;
2537
2538         mutex_lock(&adev->grbm_idx_mutex);
2539         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2540                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2541                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2542                         for (k = 0; k < adev->usec_timeout; k++) {
2543                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2544                                         break;
2545                                 udelay(1);
2546                         }
2547                         if (k == adev->usec_timeout) {
2548                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2549                                                       0xffffffff, 0xffffffff);
2550                                 mutex_unlock(&adev->grbm_idx_mutex);
2551                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2552                                          i, j);
2553                                 return;
2554                         }
2555                 }
2556         }
2557         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2558         mutex_unlock(&adev->grbm_idx_mutex);
2559
2560         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2561                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2562                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2563                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2564         for (k = 0; k < adev->usec_timeout; k++) {
2565                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2566                         break;
2567                 udelay(1);
2568         }
2569 }
2570
2571 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2572                                                bool enable)
2573 {
2574         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2575
2576         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2577         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2578         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2579         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2580
2581         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2582 }
2583
2584 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2585 {
2586         /* csib */
2587         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2588                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2589         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2590                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2591         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2592                         adev->gfx.rlc.clear_state_size);
2593 }
2594
2595 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2596                                 int indirect_offset,
2597                                 int list_size,
2598                                 int *unique_indirect_regs,
2599                                 int unique_indirect_reg_count,
2600                                 int *indirect_start_offsets,
2601                                 int *indirect_start_offsets_count,
2602                                 int max_start_offsets_count)
2603 {
2604         int idx;
2605
2606         for (; indirect_offset < list_size; indirect_offset++) {
2607                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2608                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2609                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2610
2611                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2612                         indirect_offset += 2;
2613
2614                         /* look for the matching indice */
2615                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2616                                 if (unique_indirect_regs[idx] ==
2617                                         register_list_format[indirect_offset] ||
2618                                         !unique_indirect_regs[idx])
2619                                         break;
2620                         }
2621
2622                         BUG_ON(idx >= unique_indirect_reg_count);
2623
2624                         if (!unique_indirect_regs[idx])
2625                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2626
2627                         indirect_offset++;
2628                 }
2629         }
2630 }
2631
2632 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2633 {
2634         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2635         int unique_indirect_reg_count = 0;
2636
2637         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2638         int indirect_start_offsets_count = 0;
2639
2640         int list_size = 0;
2641         int i = 0, j = 0;
2642         u32 tmp = 0;
2643
2644         u32 *register_list_format =
2645                 kmemdup(adev->gfx.rlc.register_list_format,
2646                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2647         if (!register_list_format)
2648                 return -ENOMEM;
2649
2650         /* setup unique_indirect_regs array and indirect_start_offsets array */
2651         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2652         gfx_v9_1_parse_ind_reg_list(register_list_format,
2653                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2654                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2655                                     unique_indirect_regs,
2656                                     unique_indirect_reg_count,
2657                                     indirect_start_offsets,
2658                                     &indirect_start_offsets_count,
2659                                     ARRAY_SIZE(indirect_start_offsets));
2660
2661         /* enable auto inc in case it is disabled */
2662         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2663         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2664         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2665
2666         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2667         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2668                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2669         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2670                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2671                         adev->gfx.rlc.register_restore[i]);
2672
2673         /* load indirect register */
2674         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2675                 adev->gfx.rlc.reg_list_format_start);
2676
2677         /* direct register portion */
2678         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2679                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2680                         register_list_format[i]);
2681
2682         /* indirect register portion */
2683         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2684                 if (register_list_format[i] == 0xFFFFFFFF) {
2685                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2686                         continue;
2687                 }
2688
2689                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2690                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2691
2692                 for (j = 0; j < unique_indirect_reg_count; j++) {
2693                         if (register_list_format[i] == unique_indirect_regs[j]) {
2694                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2695                                 break;
2696                         }
2697                 }
2698
2699                 BUG_ON(j >= unique_indirect_reg_count);
2700
2701                 i++;
2702         }
2703
2704         /* set save/restore list size */
2705         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2706         list_size = list_size >> 1;
2707         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2708                 adev->gfx.rlc.reg_restore_list_size);
2709         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2710
2711         /* write the starting offsets to RLC scratch ram */
2712         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2713                 adev->gfx.rlc.starting_offsets_start);
2714         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2715                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2716                        indirect_start_offsets[i]);
2717
2718         /* load unique indirect regs*/
2719         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2720                 if (unique_indirect_regs[i] != 0) {
2721                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2722                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2723                                unique_indirect_regs[i] & 0x3FFFF);
2724
2725                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2726                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2727                                unique_indirect_regs[i] >> 20);
2728                 }
2729         }
2730
2731         kfree(register_list_format);
2732         return 0;
2733 }
2734
2735 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2736 {
2737         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2738 }
2739
2740 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2741                                              bool enable)
2742 {
2743         uint32_t data = 0;
2744         uint32_t default_data = 0;
2745
2746         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2747         if (enable == true) {
2748                 /* enable GFXIP control over CGPG */
2749                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2750                 if(default_data != data)
2751                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2752
2753                 /* update status */
2754                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2755                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2756                 if(default_data != data)
2757                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2758         } else {
2759                 /* restore GFXIP control over GCPG */
2760                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2761                 if(default_data != data)
2762                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2763         }
2764 }
2765
2766 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2767 {
2768         uint32_t data = 0;
2769
2770         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2771                               AMD_PG_SUPPORT_GFX_SMG |
2772                               AMD_PG_SUPPORT_GFX_DMG)) {
2773                 /* init IDLE_POLL_COUNT = 60 */
2774                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2775                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2776                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2777                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2778
2779                 /* init RLC PG Delay */
2780                 data = 0;
2781                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2782                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2783                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2784                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2785                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2786
2787                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2788                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2789                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2790                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2791
2792                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2793                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2794                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2795                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2796
2797                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2798                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2799
2800                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2801                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2802                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2803
2804                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2805         }
2806 }
2807
2808 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2809                                                 bool enable)
2810 {
2811         uint32_t data = 0;
2812         uint32_t default_data = 0;
2813
2814         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2815         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2816                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2817                              enable ? 1 : 0);
2818         if (default_data != data)
2819                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2820 }
2821
2822 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2823                                                 bool enable)
2824 {
2825         uint32_t data = 0;
2826         uint32_t default_data = 0;
2827
2828         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2829         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2830                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2831                              enable ? 1 : 0);
2832         if(default_data != data)
2833                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2834 }
2835
2836 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2837                                         bool enable)
2838 {
2839         uint32_t data = 0;
2840         uint32_t default_data = 0;
2841
2842         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2843         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2844                              CP_PG_DISABLE,
2845                              enable ? 0 : 1);
2846         if(default_data != data)
2847                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2848 }
2849
2850 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2851                                                 bool enable)
2852 {
2853         uint32_t data, default_data;
2854
2855         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2856         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2857                              GFX_POWER_GATING_ENABLE,
2858                              enable ? 1 : 0);
2859         if(default_data != data)
2860                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2861 }
2862
2863 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2864                                                 bool enable)
2865 {
2866         uint32_t data, default_data;
2867
2868         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2869         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2870                              GFX_PIPELINE_PG_ENABLE,
2871                              enable ? 1 : 0);
2872         if(default_data != data)
2873                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2874
2875         if (!enable)
2876                 /* read any GFX register to wake up GFX */
2877                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2878 }
2879
2880 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2881                                                        bool enable)
2882 {
2883         uint32_t data, default_data;
2884
2885         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2886         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2887                              STATIC_PER_CU_PG_ENABLE,
2888                              enable ? 1 : 0);
2889         if(default_data != data)
2890                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2891 }
2892
2893 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2894                                                 bool enable)
2895 {
2896         uint32_t data, default_data;
2897
2898         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2899         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2900                              DYN_PER_CU_PG_ENABLE,
2901                              enable ? 1 : 0);
2902         if(default_data != data)
2903                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2904 }
2905
2906 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2907 {
2908         gfx_v9_0_init_csb(adev);
2909
2910         /*
2911          * Rlc save restore list is workable since v2_1.
2912          * And it's needed by gfxoff feature.
2913          */
2914         if (adev->gfx.rlc.is_rlc_v2_1) {
2915                 gfx_v9_1_init_rlc_save_restore_list(adev);
2916                 gfx_v9_0_enable_save_restore_machine(adev);
2917         }
2918
2919         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2920                               AMD_PG_SUPPORT_GFX_SMG |
2921                               AMD_PG_SUPPORT_GFX_DMG |
2922                               AMD_PG_SUPPORT_CP |
2923                               AMD_PG_SUPPORT_GDS |
2924                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2925                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2926                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2927                 gfx_v9_0_init_gfx_power_gating(adev);
2928         }
2929 }
2930
2931 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2932 {
2933         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2934         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2935         gfx_v9_0_wait_for_rlc_serdes(adev);
2936 }
2937
2938 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2939 {
2940         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2941         udelay(50);
2942         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2943         udelay(50);
2944 }
2945
2946 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2947 {
2948 #ifdef AMDGPU_RLC_DEBUG_RETRY
2949         u32 rlc_ucode_ver;
2950 #endif
2951
2952         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2953         udelay(50);
2954
2955         /* carrizo do enable cp interrupt after cp inited */
2956         if (!(adev->flags & AMD_IS_APU)) {
2957                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2958                 udelay(50);
2959         }
2960
2961 #ifdef AMDGPU_RLC_DEBUG_RETRY
2962         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2963         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2964         if(rlc_ucode_ver == 0x108) {
2965                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2966                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2967                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2968                  * default is 0x9C4 to create a 100us interval */
2969                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2970                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2971                  * to disable the page fault retry interrupts, default is
2972                  * 0x100 (256) */
2973                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2974         }
2975 #endif
2976 }
2977
2978 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2979 {
2980         const struct rlc_firmware_header_v2_0 *hdr;
2981         const __le32 *fw_data;
2982         unsigned i, fw_size;
2983
2984         if (!adev->gfx.rlc_fw)
2985                 return -EINVAL;
2986
2987         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2988         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2989
2990         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2991                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2992         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2993
2994         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2995                         RLCG_UCODE_LOADING_START_ADDRESS);
2996         for (i = 0; i < fw_size; i++)
2997                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2998         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2999
3000         return 0;
3001 }
3002
3003 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3004 {
3005         int r;
3006
3007         if (amdgpu_sriov_vf(adev)) {
3008                 gfx_v9_0_init_csb(adev);
3009                 return 0;
3010         }
3011
3012         adev->gfx.rlc.funcs->stop(adev);
3013
3014         /* disable CG */
3015         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3016
3017         gfx_v9_0_init_pg(adev);
3018
3019         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3020                 /* legacy rlc firmware loading */
3021                 r = gfx_v9_0_rlc_load_microcode(adev);
3022                 if (r)
3023                         return r;
3024         }
3025
3026         switch (adev->asic_type) {
3027         case CHIP_RAVEN:
3028                 if (amdgpu_lbpw == 0)
3029                         gfx_v9_0_enable_lbpw(adev, false);
3030                 else
3031                         gfx_v9_0_enable_lbpw(adev, true);
3032                 break;
3033         case CHIP_VEGA20:
3034                 if (amdgpu_lbpw > 0)
3035                         gfx_v9_0_enable_lbpw(adev, true);
3036                 else
3037                         gfx_v9_0_enable_lbpw(adev, false);
3038                 break;
3039         default:
3040                 break;
3041         }
3042
3043         adev->gfx.rlc.funcs->start(adev);
3044
3045         return 0;
3046 }
3047
3048 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3049 {
3050         int i;
3051         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3052
3053         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3054         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3055         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3056         if (!enable) {
3057                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3058                         adev->gfx.gfx_ring[i].sched.ready = false;
3059         }
3060         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3061         udelay(50);
3062 }
3063
3064 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3065 {
3066         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3067         const struct gfx_firmware_header_v1_0 *ce_hdr;
3068         const struct gfx_firmware_header_v1_0 *me_hdr;
3069         const __le32 *fw_data;
3070         unsigned i, fw_size;
3071
3072         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3073                 return -EINVAL;
3074
3075         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3076                 adev->gfx.pfp_fw->data;
3077         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3078                 adev->gfx.ce_fw->data;
3079         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3080                 adev->gfx.me_fw->data;
3081
3082         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3083         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3084         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3085
3086         gfx_v9_0_cp_gfx_enable(adev, false);
3087
3088         /* PFP */
3089         fw_data = (const __le32 *)
3090                 (adev->gfx.pfp_fw->data +
3091                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3092         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3093         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3094         for (i = 0; i < fw_size; i++)
3095                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3096         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3097
3098         /* CE */
3099         fw_data = (const __le32 *)
3100                 (adev->gfx.ce_fw->data +
3101                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3102         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3103         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3104         for (i = 0; i < fw_size; i++)
3105                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3106         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3107
3108         /* ME */
3109         fw_data = (const __le32 *)
3110                 (adev->gfx.me_fw->data +
3111                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3112         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3113         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3114         for (i = 0; i < fw_size; i++)
3115                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3116         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3117
3118         return 0;
3119 }
3120
3121 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3122 {
3123         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3124         const struct cs_section_def *sect = NULL;
3125         const struct cs_extent_def *ext = NULL;
3126         int r, i, tmp;
3127
3128         /* init the CP */
3129         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3130         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3131
3132         gfx_v9_0_cp_gfx_enable(adev, true);
3133
3134         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3135         if (r) {
3136                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3137                 return r;
3138         }
3139
3140         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3141         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3142
3143         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3144         amdgpu_ring_write(ring, 0x80000000);
3145         amdgpu_ring_write(ring, 0x80000000);
3146
3147         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3148                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3149                         if (sect->id == SECT_CONTEXT) {
3150                                 amdgpu_ring_write(ring,
3151                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3152                                                ext->reg_count));
3153                                 amdgpu_ring_write(ring,
3154                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3155                                 for (i = 0; i < ext->reg_count; i++)
3156                                         amdgpu_ring_write(ring, ext->extent[i]);
3157                         }
3158                 }
3159         }
3160
3161         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3162         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3163
3164         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3165         amdgpu_ring_write(ring, 0);
3166
3167         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3168         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3169         amdgpu_ring_write(ring, 0x8000);
3170         amdgpu_ring_write(ring, 0x8000);
3171
3172         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3173         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3174                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3175         amdgpu_ring_write(ring, tmp);
3176         amdgpu_ring_write(ring, 0);
3177
3178         amdgpu_ring_commit(ring);
3179
3180         return 0;
3181 }
3182
3183 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3184 {
3185         struct amdgpu_ring *ring;
3186         u32 tmp;
3187         u32 rb_bufsz;
3188         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3189
3190         /* Set the write pointer delay */
3191         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3192
3193         /* set the RB to use vmid 0 */
3194         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3195
3196         /* Set ring buffer size */
3197         ring = &adev->gfx.gfx_ring[0];
3198         rb_bufsz = order_base_2(ring->ring_size / 8);
3199         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3200         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3201 #ifdef __BIG_ENDIAN
3202         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3203 #endif
3204         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3205
3206         /* Initialize the ring buffer's write pointers */
3207         ring->wptr = 0;
3208         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3209         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3210
3211         /* set the wb address wether it's enabled or not */
3212         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3213         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3214         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3215
3216         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3217         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3218         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3219
3220         mdelay(1);
3221         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3222
3223         rb_addr = ring->gpu_addr >> 8;
3224         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3225         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3226
3227         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3228         if (ring->use_doorbell) {
3229                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3230                                     DOORBELL_OFFSET, ring->doorbell_index);
3231                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3232                                     DOORBELL_EN, 1);
3233         } else {
3234                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3235         }
3236         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3237
3238         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3239                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3240         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3241
3242         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3243                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3244
3245
3246         /* start the ring */
3247         gfx_v9_0_cp_gfx_start(adev);
3248         ring->sched.ready = true;
3249
3250         return 0;
3251 }
3252
3253 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3254 {
3255         int i;
3256
3257         if (enable) {
3258                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3259         } else {
3260                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3261                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3262                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3263                         adev->gfx.compute_ring[i].sched.ready = false;
3264                 adev->gfx.kiq.ring.sched.ready = false;
3265         }
3266         udelay(50);
3267 }
3268
3269 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3270 {
3271         const struct gfx_firmware_header_v1_0 *mec_hdr;
3272         const __le32 *fw_data;
3273         unsigned i;
3274         u32 tmp;
3275
3276         if (!adev->gfx.mec_fw)
3277                 return -EINVAL;
3278
3279         gfx_v9_0_cp_compute_enable(adev, false);
3280
3281         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3282         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3283
3284         fw_data = (const __le32 *)
3285                 (adev->gfx.mec_fw->data +
3286                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3287         tmp = 0;
3288         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3289         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3290         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3291
3292         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3293                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3294         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3295                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3296
3297         /* MEC1 */
3298         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3299                          mec_hdr->jt_offset);
3300         for (i = 0; i < mec_hdr->jt_size; i++)
3301                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3302                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3303
3304         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3305                         adev->gfx.mec_fw_version);
3306         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3307
3308         return 0;
3309 }
3310
3311 /* KIQ functions */
3312 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3313 {
3314         uint32_t tmp;
3315         struct amdgpu_device *adev = ring->adev;
3316
3317         /* tell RLC which is KIQ queue */
3318         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3319         tmp &= 0xffffff00;
3320         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3321         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3322         tmp |= 0x80;
3323         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3324 }
3325
3326 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3327 {
3328         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3329         uint64_t queue_mask = 0;
3330         int r, i;
3331
3332         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3333                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3334                         continue;
3335
3336                 /* This situation may be hit in the future if a new HW
3337                  * generation exposes more than 64 queues. If so, the
3338                  * definition of queue_mask needs updating */
3339                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3340                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3341                         break;
3342                 }
3343
3344                 queue_mask |= (1ull << i);
3345         }
3346
3347         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3348         if (r) {
3349                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3350                 return r;
3351         }
3352
3353         /* set resources */
3354         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3355         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3356                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3357         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3358         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3359         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3360         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3361         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3362         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3363         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3364                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3365                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3366                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3367
3368                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3369                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3370                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3371                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3372                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3373                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3374                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3375                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3376                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3377                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3378                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3379                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3380                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3381                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3382                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3383                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3384                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3385         }
3386
3387         r = amdgpu_ring_test_helper(kiq_ring);
3388         if (r)
3389                 DRM_ERROR("KCQ enable failed\n");
3390
3391         return r;
3392 }
3393
3394 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3395 {
3396         struct amdgpu_device *adev = ring->adev;
3397         struct v9_mqd *mqd = ring->mqd_ptr;
3398         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3399         uint32_t tmp;
3400
3401         mqd->header = 0xC0310800;
3402         mqd->compute_pipelinestat_enable = 0x00000001;
3403         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3404         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3405         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3406         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3407         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3408         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3409         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3410         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3411         mqd->compute_misc_reserved = 0x00000003;
3412
3413         mqd->dynamic_cu_mask_addr_lo =
3414                 lower_32_bits(ring->mqd_gpu_addr
3415                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3416         mqd->dynamic_cu_mask_addr_hi =
3417                 upper_32_bits(ring->mqd_gpu_addr
3418                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3419
3420         eop_base_addr = ring->eop_gpu_addr >> 8;
3421         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3422         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3423
3424         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3425         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3426         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3427                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3428
3429         mqd->cp_hqd_eop_control = tmp;
3430
3431         /* enable doorbell? */
3432         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3433
3434         if (ring->use_doorbell) {
3435                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3436                                     DOORBELL_OFFSET, ring->doorbell_index);
3437                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3438                                     DOORBELL_EN, 1);
3439                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3440                                     DOORBELL_SOURCE, 0);
3441                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3442                                     DOORBELL_HIT, 0);
3443         } else {
3444                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3445                                          DOORBELL_EN, 0);
3446         }
3447
3448         mqd->cp_hqd_pq_doorbell_control = tmp;
3449
3450         /* disable the queue if it's active */
3451         ring->wptr = 0;
3452         mqd->cp_hqd_dequeue_request = 0;
3453         mqd->cp_hqd_pq_rptr = 0;
3454         mqd->cp_hqd_pq_wptr_lo = 0;
3455         mqd->cp_hqd_pq_wptr_hi = 0;
3456
3457         /* set the pointer to the MQD */
3458         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3459         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3460
3461         /* set MQD vmid to 0 */
3462         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3463         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3464         mqd->cp_mqd_control = tmp;
3465
3466         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3467         hqd_gpu_addr = ring->gpu_addr >> 8;
3468         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3469         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3470
3471         /* set up the HQD, this is similar to CP_RB0_CNTL */
3472         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3473         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3474                             (order_base_2(ring->ring_size / 4) - 1));
3475         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3476                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3477 #ifdef __BIG_ENDIAN
3478         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3479 #endif
3480         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3481         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3482         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3483         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3484         mqd->cp_hqd_pq_control = tmp;
3485
3486         /* set the wb address whether it's enabled or not */
3487         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3488         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3489         mqd->cp_hqd_pq_rptr_report_addr_hi =
3490                 upper_32_bits(wb_gpu_addr) & 0xffff;
3491
3492         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3493         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3494         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3495         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3496
3497         tmp = 0;
3498         /* enable the doorbell if requested */
3499         if (ring->use_doorbell) {
3500                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3501                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3502                                 DOORBELL_OFFSET, ring->doorbell_index);
3503
3504                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3505                                          DOORBELL_EN, 1);
3506                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3507                                          DOORBELL_SOURCE, 0);
3508                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3509                                          DOORBELL_HIT, 0);
3510         }
3511
3512         mqd->cp_hqd_pq_doorbell_control = tmp;
3513
3514         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3515         ring->wptr = 0;
3516         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3517
3518         /* set the vmid for the queue */
3519         mqd->cp_hqd_vmid = 0;
3520
3521         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3522         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3523         mqd->cp_hqd_persistent_state = tmp;
3524
3525         /* set MIN_IB_AVAIL_SIZE */
3526         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3527         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3528         mqd->cp_hqd_ib_control = tmp;
3529
3530         /* activate the queue */
3531         mqd->cp_hqd_active = 1;
3532
3533         return 0;
3534 }
3535
3536 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3537 {
3538         struct amdgpu_device *adev = ring->adev;
3539         struct v9_mqd *mqd = ring->mqd_ptr;
3540         int j;
3541
3542         /* disable wptr polling */
3543         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3544
3545         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3546                mqd->cp_hqd_eop_base_addr_lo);
3547         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3548                mqd->cp_hqd_eop_base_addr_hi);
3549
3550         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3551         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3552                mqd->cp_hqd_eop_control);
3553
3554         /* enable doorbell? */
3555         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3556                mqd->cp_hqd_pq_doorbell_control);
3557
3558         /* disable the queue if it's active */
3559         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3560                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3561                 for (j = 0; j < adev->usec_timeout; j++) {
3562                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3563                                 break;
3564                         udelay(1);
3565                 }
3566                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3567                        mqd->cp_hqd_dequeue_request);
3568                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3569                        mqd->cp_hqd_pq_rptr);
3570                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3571                        mqd->cp_hqd_pq_wptr_lo);
3572                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3573                        mqd->cp_hqd_pq_wptr_hi);
3574         }
3575
3576         /* set the pointer to the MQD */
3577         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3578                mqd->cp_mqd_base_addr_lo);
3579         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3580                mqd->cp_mqd_base_addr_hi);
3581
3582         /* set MQD vmid to 0 */
3583         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3584                mqd->cp_mqd_control);
3585
3586         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3587         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3588                mqd->cp_hqd_pq_base_lo);
3589         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3590                mqd->cp_hqd_pq_base_hi);
3591
3592         /* set up the HQD, this is similar to CP_RB0_CNTL */
3593         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3594                mqd->cp_hqd_pq_control);
3595
3596         /* set the wb address whether it's enabled or not */
3597         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3598                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3599         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3600                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3601
3602         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3603         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3604                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3605         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3606                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3607
3608         /* enable the doorbell if requested */
3609         if (ring->use_doorbell) {
3610                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3611                                         (adev->doorbell_index.kiq * 2) << 2);
3612                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3613                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3614         }
3615
3616         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3617                mqd->cp_hqd_pq_doorbell_control);
3618
3619         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3620         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3621                mqd->cp_hqd_pq_wptr_lo);
3622         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3623                mqd->cp_hqd_pq_wptr_hi);
3624
3625         /* set the vmid for the queue */
3626         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3627
3628         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3629                mqd->cp_hqd_persistent_state);
3630
3631         /* activate the queue */
3632         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3633                mqd->cp_hqd_active);
3634
3635         if (ring->use_doorbell)
3636                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3637
3638         return 0;
3639 }
3640
3641 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3642 {
3643         struct amdgpu_device *adev = ring->adev;
3644         int j;
3645
3646         /* disable the queue if it's active */
3647         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3648
3649                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3650
3651                 for (j = 0; j < adev->usec_timeout; j++) {
3652                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3653                                 break;
3654                         udelay(1);
3655                 }
3656
3657                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3658                         DRM_DEBUG("KIQ dequeue request failed.\n");
3659
3660                         /* Manual disable if dequeue request times out */
3661                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3662                 }
3663
3664                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3665                       0);
3666         }
3667
3668         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3669         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3670         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3671         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3672         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3673         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3674         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3675         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3676
3677         return 0;
3678 }
3679
3680 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3681 {
3682         struct amdgpu_device *adev = ring->adev;
3683         struct v9_mqd *mqd = ring->mqd_ptr;
3684         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3685
3686         gfx_v9_0_kiq_setting(ring);
3687
3688         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3689                 /* reset MQD to a clean status */
3690                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3691                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3692
3693                 /* reset ring buffer */
3694                 ring->wptr = 0;
3695                 amdgpu_ring_clear_ring(ring);
3696
3697                 mutex_lock(&adev->srbm_mutex);
3698                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3699                 gfx_v9_0_kiq_init_register(ring);
3700                 soc15_grbm_select(adev, 0, 0, 0, 0);
3701                 mutex_unlock(&adev->srbm_mutex);
3702         } else {
3703                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3704                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3705                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3706                 mutex_lock(&adev->srbm_mutex);
3707                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3708                 gfx_v9_0_mqd_init(ring);
3709                 gfx_v9_0_kiq_init_register(ring);
3710                 soc15_grbm_select(adev, 0, 0, 0, 0);
3711                 mutex_unlock(&adev->srbm_mutex);
3712
3713                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3714                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3715         }
3716
3717         return 0;
3718 }
3719
3720 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3721 {
3722         struct amdgpu_device *adev = ring->adev;
3723         struct v9_mqd *mqd = ring->mqd_ptr;
3724         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3725
3726         if (!adev->in_gpu_reset && !adev->in_suspend) {
3727                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3728                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3729                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3730                 mutex_lock(&adev->srbm_mutex);
3731                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3732                 gfx_v9_0_mqd_init(ring);
3733                 soc15_grbm_select(adev, 0, 0, 0, 0);
3734                 mutex_unlock(&adev->srbm_mutex);
3735
3736                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3737                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3738         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3739                 /* reset MQD to a clean status */
3740                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3741                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3742
3743                 /* reset ring buffer */
3744                 ring->wptr = 0;
3745                 amdgpu_ring_clear_ring(ring);
3746         } else {
3747                 amdgpu_ring_clear_ring(ring);
3748         }
3749
3750         return 0;
3751 }
3752
3753 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3754 {
3755         struct amdgpu_ring *ring;
3756         int r;
3757
3758         ring = &adev->gfx.kiq.ring;
3759
3760         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3761         if (unlikely(r != 0))
3762                 return r;
3763
3764         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3765         if (unlikely(r != 0))
3766                 return r;
3767
3768         gfx_v9_0_kiq_init_queue(ring);
3769         amdgpu_bo_kunmap(ring->mqd_obj);
3770         ring->mqd_ptr = NULL;
3771         amdgpu_bo_unreserve(ring->mqd_obj);
3772         ring->sched.ready = true;
3773         return 0;
3774 }
3775
3776 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3777 {
3778         struct amdgpu_ring *ring = NULL;
3779         int r = 0, i;
3780
3781         gfx_v9_0_cp_compute_enable(adev, true);
3782
3783         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3784                 ring = &adev->gfx.compute_ring[i];
3785
3786                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3787                 if (unlikely(r != 0))
3788                         goto done;
3789                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3790                 if (!r) {
3791                         r = gfx_v9_0_kcq_init_queue(ring);
3792                         amdgpu_bo_kunmap(ring->mqd_obj);
3793                         ring->mqd_ptr = NULL;
3794                 }
3795                 amdgpu_bo_unreserve(ring->mqd_obj);
3796                 if (r)
3797                         goto done;
3798         }
3799
3800         r = gfx_v9_0_kiq_kcq_enable(adev);
3801 done:
3802         return r;
3803 }
3804
3805 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3806 {
3807         int r, i;
3808         struct amdgpu_ring *ring;
3809
3810         if (!(adev->flags & AMD_IS_APU))
3811                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3812
3813         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3814                 if (adev->asic_type != CHIP_ARCTURUS) {
3815                         /* legacy firmware loading */
3816                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3817                         if (r)
3818                                 return r;
3819                 }
3820
3821                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3822                 if (r)
3823                         return r;
3824         }
3825
3826         r = gfx_v9_0_kiq_resume(adev);
3827         if (r)
3828                 return r;
3829
3830         if (adev->asic_type != CHIP_ARCTURUS) {
3831                 r = gfx_v9_0_cp_gfx_resume(adev);
3832                 if (r)
3833                         return r;
3834         }
3835
3836         r = gfx_v9_0_kcq_resume(adev);
3837         if (r)
3838                 return r;
3839
3840         if (adev->asic_type != CHIP_ARCTURUS) {
3841                 ring = &adev->gfx.gfx_ring[0];
3842                 r = amdgpu_ring_test_helper(ring);
3843                 if (r)
3844                         return r;
3845         }
3846
3847         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3848                 ring = &adev->gfx.compute_ring[i];
3849                 amdgpu_ring_test_helper(ring);
3850         }
3851
3852         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3853
3854         return 0;
3855 }
3856
3857 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3858 {
3859         if (adev->asic_type != CHIP_ARCTURUS)
3860                 gfx_v9_0_cp_gfx_enable(adev, enable);
3861         gfx_v9_0_cp_compute_enable(adev, enable);
3862 }
3863
3864 static int gfx_v9_0_hw_init(void *handle)
3865 {
3866         int r;
3867         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3868
3869         if (!amdgpu_sriov_vf(adev))
3870                 gfx_v9_0_init_golden_registers(adev);
3871
3872         gfx_v9_0_constants_init(adev);
3873
3874         r = gfx_v9_0_csb_vram_pin(adev);
3875         if (r)
3876                 return r;
3877
3878         r = adev->gfx.rlc.funcs->resume(adev);
3879         if (r)
3880                 return r;
3881
3882         r = gfx_v9_0_cp_resume(adev);
3883         if (r)
3884                 return r;
3885
3886         if (adev->asic_type != CHIP_ARCTURUS) {
3887                 r = gfx_v9_0_ngg_en(adev);
3888                 if (r)
3889                         return r;
3890         }
3891
3892         return r;
3893 }
3894
3895 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3896 {
3897         int r, i;
3898         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3899
3900         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3901         if (r)
3902                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3903
3904         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3905                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3906
3907                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3908                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3909                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3910                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3911                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3912                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3913                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3914                 amdgpu_ring_write(kiq_ring, 0);
3915                 amdgpu_ring_write(kiq_ring, 0);
3916                 amdgpu_ring_write(kiq_ring, 0);
3917         }
3918         r = amdgpu_ring_test_helper(kiq_ring);
3919         if (r)
3920                 DRM_ERROR("KCQ disable failed\n");
3921
3922         return r;
3923 }
3924
3925 static int gfx_v9_0_hw_fini(void *handle)
3926 {
3927         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3928
3929         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3930         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3931         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3932
3933         /* disable KCQ to avoid CPC touch memory not valid anymore */
3934         gfx_v9_0_kcq_disable(adev);
3935
3936         if (amdgpu_sriov_vf(adev)) {
3937                 gfx_v9_0_cp_gfx_enable(adev, false);
3938                 /* must disable polling for SRIOV when hw finished, otherwise
3939                  * CPC engine may still keep fetching WB address which is already
3940                  * invalid after sw finished and trigger DMAR reading error in
3941                  * hypervisor side.
3942                  */
3943                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3944                 return 0;
3945         }
3946
3947         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3948          * otherwise KIQ is hanging when binding back
3949          */
3950         if (!adev->in_gpu_reset && !adev->in_suspend) {
3951                 mutex_lock(&adev->srbm_mutex);
3952                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3953                                 adev->gfx.kiq.ring.pipe,
3954                                 adev->gfx.kiq.ring.queue, 0);
3955                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3956                 soc15_grbm_select(adev, 0, 0, 0, 0);
3957                 mutex_unlock(&adev->srbm_mutex);
3958         }
3959
3960         gfx_v9_0_cp_enable(adev, false);
3961         adev->gfx.rlc.funcs->stop(adev);
3962
3963         gfx_v9_0_csb_vram_unpin(adev);
3964
3965         return 0;
3966 }
3967
3968 static int gfx_v9_0_suspend(void *handle)
3969 {
3970         return gfx_v9_0_hw_fini(handle);
3971 }
3972
3973 static int gfx_v9_0_resume(void *handle)
3974 {
3975         return gfx_v9_0_hw_init(handle);
3976 }
3977
3978 static bool gfx_v9_0_is_idle(void *handle)
3979 {
3980         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3981
3982         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3983                                 GRBM_STATUS, GUI_ACTIVE))
3984                 return false;
3985         else
3986                 return true;
3987 }
3988
3989 static int gfx_v9_0_wait_for_idle(void *handle)
3990 {
3991         unsigned i;
3992         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3993
3994         for (i = 0; i < adev->usec_timeout; i++) {
3995                 if (gfx_v9_0_is_idle(handle))
3996                         return 0;
3997                 udelay(1);
3998         }
3999         return -ETIMEDOUT;
4000 }
4001
4002 static int gfx_v9_0_soft_reset(void *handle)
4003 {
4004         u32 grbm_soft_reset = 0;
4005         u32 tmp;
4006         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4007
4008         /* GRBM_STATUS */
4009         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4010         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4011                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4012                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4013                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4014                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4015                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4016                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4017                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4018                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4019                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4020         }
4021
4022         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4023                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4024                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4025         }
4026
4027         /* GRBM_STATUS2 */
4028         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4029         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4030                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4031                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4032
4033
4034         if (grbm_soft_reset) {
4035                 /* stop the rlc */
4036                 adev->gfx.rlc.funcs->stop(adev);
4037
4038                 if (adev->asic_type != CHIP_ARCTURUS)
4039                         /* Disable GFX parsing/prefetching */
4040                         gfx_v9_0_cp_gfx_enable(adev, false);
4041
4042                 /* Disable MEC parsing/prefetching */
4043                 gfx_v9_0_cp_compute_enable(adev, false);
4044
4045                 if (grbm_soft_reset) {
4046                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4047                         tmp |= grbm_soft_reset;
4048                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4049                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4050                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4051
4052                         udelay(50);
4053
4054                         tmp &= ~grbm_soft_reset;
4055                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4056                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4057                 }
4058
4059                 /* Wait a little for things to settle down */
4060                 udelay(50);
4061         }
4062         return 0;
4063 }
4064
4065 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4066 {
4067         uint64_t clock;
4068
4069         mutex_lock(&adev->gfx.gpu_clock_mutex);
4070         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4071         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4072                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4073         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4074         return clock;
4075 }
4076
4077 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4078                                           uint32_t vmid,
4079                                           uint32_t gds_base, uint32_t gds_size,
4080                                           uint32_t gws_base, uint32_t gws_size,
4081                                           uint32_t oa_base, uint32_t oa_size)
4082 {
4083         struct amdgpu_device *adev = ring->adev;
4084
4085         /* GDS Base */
4086         gfx_v9_0_write_data_to_reg(ring, 0, false,
4087                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4088                                    gds_base);
4089
4090         /* GDS Size */
4091         gfx_v9_0_write_data_to_reg(ring, 0, false,
4092                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4093                                    gds_size);
4094
4095         /* GWS */
4096         gfx_v9_0_write_data_to_reg(ring, 0, false,
4097                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4098                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4099
4100         /* OA */
4101         gfx_v9_0_write_data_to_reg(ring, 0, false,
4102                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4103                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4104 }
4105
4106 static const u32 vgpr_init_compute_shader[] =
4107 {
4108         0xb07c0000, 0xbe8000ff,
4109         0x000000f8, 0xbf110800,
4110         0x7e000280, 0x7e020280,
4111         0x7e040280, 0x7e060280,
4112         0x7e080280, 0x7e0a0280,
4113         0x7e0c0280, 0x7e0e0280,
4114         0x80808800, 0xbe803200,
4115         0xbf84fff5, 0xbf9c0000,
4116         0xd28c0001, 0x0001007f,
4117         0xd28d0001, 0x0002027e,
4118         0x10020288, 0xb8810904,
4119         0xb7814000, 0xd1196a01,
4120         0x00000301, 0xbe800087,
4121         0xbefc00c1, 0xd89c4000,
4122         0x00020201, 0xd89cc080,
4123         0x00040401, 0x320202ff,
4124         0x00000800, 0x80808100,
4125         0xbf84fff8, 0x7e020280,
4126         0xbf810000, 0x00000000,
4127 };
4128
4129 static const u32 sgpr_init_compute_shader[] =
4130 {
4131         0xb07c0000, 0xbe8000ff,
4132         0x0000005f, 0xbee50080,
4133         0xbe812c65, 0xbe822c65,
4134         0xbe832c65, 0xbe842c65,
4135         0xbe852c65, 0xb77c0005,
4136         0x80808500, 0xbf84fff8,
4137         0xbe800080, 0xbf810000,
4138 };
4139
4140 static const struct soc15_reg_entry vgpr_init_regs[] = {
4141    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4142    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4143    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4144    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4145    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4146    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4147    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4148    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4149    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4150    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4151 };
4152
4153 static const struct soc15_reg_entry sgpr_init_regs[] = {
4154    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4155    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4156    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4157    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4158    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4159    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4160    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4161    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4162    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4163    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4164 };
4165
4166 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4167    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4168    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4169    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4170    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4171    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4172    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4173    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4174    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4175    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4176    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4177    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4178    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4179    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4180    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4181    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4182    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4183    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4184    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4185    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4186    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4187    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4188    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4189    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4190    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4191    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4192    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4193    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4194    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4195    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4196    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4197    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4198    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4199 };
4200
4201 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4202 {
4203         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4204         int i, r;
4205
4206         r = amdgpu_ring_alloc(ring, 7);
4207         if (r) {
4208                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4209                         ring->name, r);
4210                 return r;
4211         }
4212
4213         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4214         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4215
4216         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4217         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4218                                 PACKET3_DMA_DATA_DST_SEL(1) |
4219                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4220                                 PACKET3_DMA_DATA_ENGINE(0)));
4221         amdgpu_ring_write(ring, 0);
4222         amdgpu_ring_write(ring, 0);
4223         amdgpu_ring_write(ring, 0);
4224         amdgpu_ring_write(ring, 0);
4225         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4226                                 adev->gds.gds_size);
4227
4228         amdgpu_ring_commit(ring);
4229
4230         for (i = 0; i < adev->usec_timeout; i++) {
4231                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4232                         break;
4233                 udelay(1);
4234         }
4235
4236         if (i >= adev->usec_timeout)
4237                 r = -ETIMEDOUT;
4238
4239         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4240
4241         return r;
4242 }
4243
4244 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4245 {
4246         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4247         struct amdgpu_ib ib;
4248         struct dma_fence *f = NULL;
4249         int r, i, j, k;
4250         unsigned total_size, vgpr_offset, sgpr_offset;
4251         u64 gpu_addr;
4252
4253         /* only support when RAS is enabled */
4254         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4255                 return 0;
4256
4257         /* bail if the compute ring is not ready */
4258         if (!ring->sched.ready)
4259                 return 0;
4260
4261         total_size =
4262                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4263         total_size +=
4264                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4265         total_size = ALIGN(total_size, 256);
4266         vgpr_offset = total_size;
4267         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4268         sgpr_offset = total_size;
4269         total_size += sizeof(sgpr_init_compute_shader);
4270
4271         /* allocate an indirect buffer to put the commands in */
4272         memset(&ib, 0, sizeof(ib));
4273         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4274         if (r) {
4275                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4276                 return r;
4277         }
4278
4279         /* load the compute shaders */
4280         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4281                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4282
4283         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4284                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4285
4286         /* init the ib length to 0 */
4287         ib.length_dw = 0;
4288
4289         /* VGPR */
4290         /* write the register state for the compute dispatch */
4291         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4292                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4293                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4294                                                                 - PACKET3_SET_SH_REG_START;
4295                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4296         }
4297         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4298         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4299         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4300         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4301                                                         - PACKET3_SET_SH_REG_START;
4302         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4303         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4304
4305         /* write dispatch packet */
4306         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4307         ib.ptr[ib.length_dw++] = 128; /* x */
4308         ib.ptr[ib.length_dw++] = 1; /* y */
4309         ib.ptr[ib.length_dw++] = 1; /* z */
4310         ib.ptr[ib.length_dw++] =
4311                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4312
4313         /* write CS partial flush packet */
4314         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4315         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4316
4317         /* SGPR */
4318         /* write the register state for the compute dispatch */
4319         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4320                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4321                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4322                                                                 - PACKET3_SET_SH_REG_START;
4323                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4324         }
4325         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4326         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4327         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4328         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4329                                                         - PACKET3_SET_SH_REG_START;
4330         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4331         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4332
4333         /* write dispatch packet */
4334         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4335         ib.ptr[ib.length_dw++] = 128; /* x */
4336         ib.ptr[ib.length_dw++] = 1; /* y */
4337         ib.ptr[ib.length_dw++] = 1; /* z */
4338         ib.ptr[ib.length_dw++] =
4339                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4340
4341         /* write CS partial flush packet */
4342         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4343         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4344
4345         /* shedule the ib on the ring */
4346         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4347         if (r) {
4348                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4349                 goto fail;
4350         }
4351
4352         /* wait for the GPU to finish processing the IB */
4353         r = dma_fence_wait(f, false);
4354         if (r) {
4355                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4356                 goto fail;
4357         }
4358
4359         /* read back registers to clear the counters */
4360         mutex_lock(&adev->grbm_idx_mutex);
4361         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4362                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4363                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4364                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4365                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4366                         }
4367                 }
4368         }
4369         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4370         mutex_unlock(&adev->grbm_idx_mutex);
4371
4372 fail:
4373         amdgpu_ib_free(adev, &ib, NULL);
4374         dma_fence_put(f);
4375
4376         return r;
4377 }
4378
4379 static int gfx_v9_0_early_init(void *handle)
4380 {
4381         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4382
4383         if (adev->asic_type == CHIP_ARCTURUS)
4384                 adev->gfx.num_gfx_rings = 0;
4385         else
4386                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4387         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4388         gfx_v9_0_set_ring_funcs(adev);
4389         gfx_v9_0_set_irq_funcs(adev);
4390         gfx_v9_0_set_gds_init(adev);
4391         gfx_v9_0_set_rlc_funcs(adev);
4392
4393         return 0;
4394 }
4395
4396 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4397                 struct ras_err_data *err_data,
4398                 struct amdgpu_iv_entry *entry);
4399
4400 static int gfx_v9_0_ecc_late_init(void *handle)
4401 {
4402         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4403         struct ras_common_if **ras_if = &adev->gfx.ras_if;
4404         struct ras_ih_if ih_info = {
4405                 .cb = gfx_v9_0_process_ras_data_cb,
4406         };
4407         struct ras_fs_if fs_info = {
4408                 .sysfs_name = "gfx_err_count",
4409                 .debugfs_name = "gfx_err_inject",
4410         };
4411         struct ras_common_if ras_block = {
4412                 .block = AMDGPU_RAS_BLOCK__GFX,
4413                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4414                 .sub_block_index = 0,
4415                 .name = "gfx",
4416         };
4417         int r;
4418
4419         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4420                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4421                 return 0;
4422         }
4423
4424         r = gfx_v9_0_do_edc_gds_workarounds(adev);
4425         if (r)
4426                 return r;
4427
4428         /* requires IBs so do in late init after IB pool is initialized */
4429         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4430         if (r)
4431                 return r;
4432
4433         /* handle resume path. */
4434         if (*ras_if) {
4435                 /* resend ras TA enable cmd during resume.
4436                  * prepare to handle failure.
4437                  */
4438                 ih_info.head = **ras_if;
4439                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4440                 if (r) {
4441                         if (r == -EAGAIN) {
4442                                 /* request a gpu reset. will run again. */
4443                                 amdgpu_ras_request_reset_on_boot(adev,
4444                                                 AMDGPU_RAS_BLOCK__GFX);
4445                                 return 0;
4446                         }
4447                         /* fail to enable ras, cleanup all. */
4448                         goto irq;
4449                 }
4450                 /* enable successfully. continue. */
4451                 goto resume;
4452         }
4453
4454         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4455         if (!*ras_if)
4456                 return -ENOMEM;
4457
4458         **ras_if = ras_block;
4459
4460         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4461         if (r) {
4462                 if (r == -EAGAIN) {
4463                         amdgpu_ras_request_reset_on_boot(adev,
4464                                         AMDGPU_RAS_BLOCK__GFX);
4465                         r = 0;
4466                 }
4467                 goto feature;
4468         }
4469
4470         ih_info.head = **ras_if;
4471         fs_info.head = **ras_if;
4472
4473         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4474         if (r)
4475                 goto interrupt;
4476
4477         amdgpu_ras_debugfs_create(adev, &fs_info);
4478
4479         r = amdgpu_ras_sysfs_create(adev, &fs_info);
4480         if (r)
4481                 goto sysfs;
4482 resume:
4483         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4484         if (r)
4485                 goto irq;
4486
4487         return 0;
4488 irq:
4489         amdgpu_ras_sysfs_remove(adev, *ras_if);
4490 sysfs:
4491         amdgpu_ras_debugfs_remove(adev, *ras_if);
4492         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4493 interrupt:
4494         amdgpu_ras_feature_enable(adev, *ras_if, 0);
4495 feature:
4496         kfree(*ras_if);
4497         *ras_if = NULL;
4498         return r;
4499 }
4500
4501 static int gfx_v9_0_late_init(void *handle)
4502 {
4503         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4504         int r;
4505
4506         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4507         if (r)
4508                 return r;
4509
4510         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4511         if (r)
4512                 return r;
4513
4514         r = gfx_v9_0_ecc_late_init(handle);
4515         if (r)
4516                 return r;
4517
4518         return 0;
4519 }
4520
4521 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4522 {
4523         uint32_t rlc_setting;
4524
4525         /* if RLC is not enabled, do nothing */
4526         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4527         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4528                 return false;
4529
4530         return true;
4531 }
4532
4533 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4534 {
4535         uint32_t data;
4536         unsigned i;
4537
4538         data = RLC_SAFE_MODE__CMD_MASK;
4539         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4540         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4541
4542         /* wait for RLC_SAFE_MODE */
4543         for (i = 0; i < adev->usec_timeout; i++) {
4544                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4545                         break;
4546                 udelay(1);
4547         }
4548 }
4549
4550 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4551 {
4552         uint32_t data;
4553
4554         data = RLC_SAFE_MODE__CMD_MASK;
4555         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4556 }
4557
4558 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4559                                                 bool enable)
4560 {
4561         amdgpu_gfx_rlc_enter_safe_mode(adev);
4562
4563         if (is_support_sw_smu(adev) && !enable)
4564                 smu_set_gfx_cgpg(&adev->smu, enable);
4565
4566         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4567                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4568                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4569                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4570         } else {
4571                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4572                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4573         }
4574
4575         amdgpu_gfx_rlc_exit_safe_mode(adev);
4576 }
4577
4578 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4579                                                 bool enable)
4580 {
4581         /* TODO: double check if we need to perform under safe mode */
4582         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4583
4584         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4585                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4586         else
4587                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4588
4589         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4590                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4591         else
4592                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4593
4594         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4595 }
4596
4597 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4598                                                       bool enable)
4599 {
4600         uint32_t data, def;
4601
4602         amdgpu_gfx_rlc_enter_safe_mode(adev);
4603
4604         /* It is disabled by HW by default */
4605         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4606                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4607                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4608
4609                 if (adev->asic_type != CHIP_VEGA12)
4610                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4611
4612                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4613                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4614                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4615
4616                 /* only for Vega10 & Raven1 */
4617                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4618
4619                 if (def != data)
4620                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4621
4622                 /* MGLS is a global flag to control all MGLS in GFX */
4623                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4624                         /* 2 - RLC memory Light sleep */
4625                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4626                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4627                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4628                                 if (def != data)
4629                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4630                         }
4631                         /* 3 - CP memory Light sleep */
4632                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4633                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4634                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4635                                 if (def != data)
4636                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4637                         }
4638                 }
4639         } else {
4640                 /* 1 - MGCG_OVERRIDE */
4641                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4642
4643                 if (adev->asic_type != CHIP_VEGA12)
4644                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4645
4646                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4647                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4648                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4649                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4650
4651                 if (def != data)
4652                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4653
4654                 /* 2 - disable MGLS in RLC */
4655                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4656                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4657                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4658                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4659                 }
4660
4661                 /* 3 - disable MGLS in CP */
4662                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4663                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4664                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4665                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4666                 }
4667         }
4668
4669         amdgpu_gfx_rlc_exit_safe_mode(adev);
4670 }
4671
4672 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4673                                            bool enable)
4674 {
4675         uint32_t data, def;
4676
4677         if (adev->asic_type == CHIP_ARCTURUS)
4678                 return;
4679
4680         amdgpu_gfx_rlc_enter_safe_mode(adev);
4681
4682         /* Enable 3D CGCG/CGLS */
4683         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4684                 /* write cmd to clear cgcg/cgls ov */
4685                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4686                 /* unset CGCG override */
4687                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4688                 /* update CGCG and CGLS override bits */
4689                 if (def != data)
4690                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4691
4692                 /* enable 3Dcgcg FSM(0x0000363f) */
4693                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4694
4695                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4696                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4697                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4698                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4699                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4700                 if (def != data)
4701                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4702
4703                 /* set IDLE_POLL_COUNT(0x00900100) */
4704                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4705                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4706                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4707                 if (def != data)
4708                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4709         } else {
4710                 /* Disable CGCG/CGLS */
4711                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4712                 /* disable cgcg, cgls should be disabled */
4713                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4714                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4715                 /* disable cgcg and cgls in FSM */
4716                 if (def != data)
4717                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4718         }
4719
4720         amdgpu_gfx_rlc_exit_safe_mode(adev);
4721 }
4722
4723 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4724                                                       bool enable)
4725 {
4726         uint32_t def, data;
4727
4728         amdgpu_gfx_rlc_enter_safe_mode(adev);
4729
4730         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4731                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4732                 /* unset CGCG override */
4733                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4734                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4735                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4736                 else
4737                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4738                 /* update CGCG and CGLS override bits */
4739                 if (def != data)
4740                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4741
4742                 /* enable cgcg FSM(0x0000363F) */
4743                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4744
4745                 if (adev->asic_type == CHIP_ARCTURUS)
4746                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4747                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4748                 else
4749                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4750                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4751                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4752                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4753                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4754                 if (def != data)
4755                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4756
4757                 /* set IDLE_POLL_COUNT(0x00900100) */
4758                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4759                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4760                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4761                 if (def != data)
4762                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4763         } else {
4764                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4765                 /* reset CGCG/CGLS bits */
4766                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4767                 /* disable cgcg and cgls in FSM */
4768                 if (def != data)
4769                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4770         }
4771
4772         amdgpu_gfx_rlc_exit_safe_mode(adev);
4773 }
4774
4775 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4776                                             bool enable)
4777 {
4778         if (enable) {
4779                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4780                  * ===  MGCG + MGLS ===
4781                  */
4782                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4783                 /* ===  CGCG /CGLS for GFX 3D Only === */
4784                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4785                 /* ===  CGCG + CGLS === */
4786                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4787         } else {
4788                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4789                  * ===  CGCG + CGLS ===
4790                  */
4791                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4792                 /* ===  CGCG /CGLS for GFX 3D Only === */
4793                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4794                 /* ===  MGCG + MGLS === */
4795                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4796         }
4797         return 0;
4798 }
4799
4800 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4801         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4802         .set_safe_mode = gfx_v9_0_set_safe_mode,
4803         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4804         .init = gfx_v9_0_rlc_init,
4805         .get_csb_size = gfx_v9_0_get_csb_size,
4806         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4807         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4808         .resume = gfx_v9_0_rlc_resume,
4809         .stop = gfx_v9_0_rlc_stop,
4810         .reset = gfx_v9_0_rlc_reset,
4811         .start = gfx_v9_0_rlc_start
4812 };
4813
4814 static int gfx_v9_0_set_powergating_state(void *handle,
4815                                           enum amd_powergating_state state)
4816 {
4817         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4818         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4819
4820         switch (adev->asic_type) {
4821         case CHIP_RAVEN:
4822         case CHIP_RENOIR:
4823                 if (!enable) {
4824                         amdgpu_gfx_off_ctrl(adev, false);
4825                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4826                 }
4827                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4828                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4829                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4830                 } else {
4831                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4832                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4833                 }
4834
4835                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4836                         gfx_v9_0_enable_cp_power_gating(adev, true);
4837                 else
4838                         gfx_v9_0_enable_cp_power_gating(adev, false);
4839
4840                 /* update gfx cgpg state */
4841                 if (is_support_sw_smu(adev) && enable)
4842                         smu_set_gfx_cgpg(&adev->smu, enable);
4843                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4844
4845                 /* update mgcg state */
4846                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4847
4848                 if (enable)
4849                         amdgpu_gfx_off_ctrl(adev, true);
4850                 break;
4851         case CHIP_VEGA12:
4852                 if (!enable) {
4853                         amdgpu_gfx_off_ctrl(adev, false);
4854                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4855                 } else {
4856                         amdgpu_gfx_off_ctrl(adev, true);
4857                 }
4858                 break;
4859         default:
4860                 break;
4861         }
4862
4863         return 0;
4864 }
4865
4866 static int gfx_v9_0_set_clockgating_state(void *handle,
4867                                           enum amd_clockgating_state state)
4868 {
4869         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4870
4871         if (amdgpu_sriov_vf(adev))
4872                 return 0;
4873
4874         switch (adev->asic_type) {
4875         case CHIP_VEGA10:
4876         case CHIP_VEGA12:
4877         case CHIP_VEGA20:
4878         case CHIP_RAVEN:
4879         case CHIP_ARCTURUS:
4880         case CHIP_RENOIR:
4881                 gfx_v9_0_update_gfx_clock_gating(adev,
4882                                                  state == AMD_CG_STATE_GATE ? true : false);
4883                 break;
4884         default:
4885                 break;
4886         }
4887         return 0;
4888 }
4889
4890 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4891 {
4892         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4893         int data;
4894
4895         if (amdgpu_sriov_vf(adev))
4896                 *flags = 0;
4897
4898         /* AMD_CG_SUPPORT_GFX_MGCG */
4899         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4900         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4901                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4902
4903         /* AMD_CG_SUPPORT_GFX_CGCG */
4904         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4905         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4906                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4907
4908         /* AMD_CG_SUPPORT_GFX_CGLS */
4909         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4910                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4911
4912         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4913         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4914         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4915                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4916
4917         /* AMD_CG_SUPPORT_GFX_CP_LS */
4918         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4919         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4920                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4921
4922         if (adev->asic_type != CHIP_ARCTURUS) {
4923                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4924                 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4925                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4926                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4927
4928                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4929                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4930                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4931         }
4932 }
4933
4934 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4935 {
4936         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4937 }
4938
4939 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4940 {
4941         struct amdgpu_device *adev = ring->adev;
4942         u64 wptr;
4943
4944         /* XXX check if swapping is necessary on BE */
4945         if (ring->use_doorbell) {
4946                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4947         } else {
4948                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4949                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4950         }
4951
4952         return wptr;
4953 }
4954
4955 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4956 {
4957         struct amdgpu_device *adev = ring->adev;
4958
4959         if (ring->use_doorbell) {
4960                 /* XXX check if swapping is necessary on BE */
4961                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4962                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4963         } else {
4964                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4965                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4966         }
4967 }
4968
4969 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4970 {
4971         struct amdgpu_device *adev = ring->adev;
4972         u32 ref_and_mask, reg_mem_engine;
4973         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4974
4975         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4976                 switch (ring->me) {
4977                 case 1:
4978                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4979                         break;
4980                 case 2:
4981                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4982                         break;
4983                 default:
4984                         return;
4985                 }
4986                 reg_mem_engine = 0;
4987         } else {
4988                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4989                 reg_mem_engine = 1; /* pfp */
4990         }
4991
4992         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4993                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4994                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4995                               ref_and_mask, ref_and_mask, 0x20);
4996 }
4997
4998 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4999                                         struct amdgpu_job *job,
5000                                         struct amdgpu_ib *ib,
5001                                         uint32_t flags)
5002 {
5003         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5004         u32 header, control = 0;
5005
5006         if (ib->flags & AMDGPU_IB_FLAG_CE)
5007                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5008         else
5009                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5010
5011         control |= ib->length_dw | (vmid << 24);
5012
5013         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5014                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5015
5016                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
5017                         gfx_v9_0_ring_emit_de_meta(ring);
5018         }
5019
5020         amdgpu_ring_write(ring, header);
5021         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5022         amdgpu_ring_write(ring,
5023 #ifdef __BIG_ENDIAN
5024                 (2 << 0) |
5025 #endif
5026                 lower_32_bits(ib->gpu_addr));
5027         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5028         amdgpu_ring_write(ring, control);
5029 }
5030
5031 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5032                                           struct amdgpu_job *job,
5033                                           struct amdgpu_ib *ib,
5034                                           uint32_t flags)
5035 {
5036         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5037         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5038
5039         /* Currently, there is a high possibility to get wave ID mismatch
5040          * between ME and GDS, leading to a hw deadlock, because ME generates
5041          * different wave IDs than the GDS expects. This situation happens
5042          * randomly when at least 5 compute pipes use GDS ordered append.
5043          * The wave IDs generated by ME are also wrong after suspend/resume.
5044          * Those are probably bugs somewhere else in the kernel driver.
5045          *
5046          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5047          * GDS to 0 for this ring (me/pipe).
5048          */
5049         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5050                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5051                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5052                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5053         }
5054
5055         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5056         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5057         amdgpu_ring_write(ring,
5058 #ifdef __BIG_ENDIAN
5059                                 (2 << 0) |
5060 #endif
5061                                 lower_32_bits(ib->gpu_addr));
5062         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5063         amdgpu_ring_write(ring, control);
5064 }
5065
5066 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5067                                      u64 seq, unsigned flags)
5068 {
5069         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5070         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5071         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5072
5073         /* RELEASE_MEM - flush caches, send int */
5074         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5075         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5076                                                EOP_TC_NC_ACTION_EN) :
5077                                               (EOP_TCL1_ACTION_EN |
5078                                                EOP_TC_ACTION_EN |
5079                                                EOP_TC_WB_ACTION_EN |
5080                                                EOP_TC_MD_ACTION_EN)) |
5081                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5082                                  EVENT_INDEX(5)));
5083         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5084
5085         /*
5086          * the address should be Qword aligned if 64bit write, Dword
5087          * aligned if only send 32bit data low (discard data high)
5088          */
5089         if (write64bit)
5090                 BUG_ON(addr & 0x7);
5091         else
5092                 BUG_ON(addr & 0x3);
5093         amdgpu_ring_write(ring, lower_32_bits(addr));
5094         amdgpu_ring_write(ring, upper_32_bits(addr));
5095         amdgpu_ring_write(ring, lower_32_bits(seq));
5096         amdgpu_ring_write(ring, upper_32_bits(seq));
5097         amdgpu_ring_write(ring, 0);
5098 }
5099
5100 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5101 {
5102         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5103         uint32_t seq = ring->fence_drv.sync_seq;
5104         uint64_t addr = ring->fence_drv.gpu_addr;
5105
5106         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5107                               lower_32_bits(addr), upper_32_bits(addr),
5108                               seq, 0xffffffff, 4);
5109 }
5110
5111 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5112                                         unsigned vmid, uint64_t pd_addr)
5113 {
5114         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5115
5116         /* compute doesn't have PFP */
5117         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5118                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5119                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5120                 amdgpu_ring_write(ring, 0x0);
5121         }
5122 }
5123
5124 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5125 {
5126         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5127 }
5128
5129 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5130 {
5131         u64 wptr;
5132
5133         /* XXX check if swapping is necessary on BE */
5134         if (ring->use_doorbell)
5135                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5136         else
5137                 BUG();
5138         return wptr;
5139 }
5140
5141 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5142                                            bool acquire)
5143 {
5144         struct amdgpu_device *adev = ring->adev;
5145         int pipe_num, tmp, reg;
5146         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5147
5148         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5149
5150         /* first me only has 2 entries, GFX and HP3D */
5151         if (ring->me > 0)
5152                 pipe_num -= 2;
5153
5154         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5155         tmp = RREG32(reg);
5156         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5157         WREG32(reg, tmp);
5158 }
5159
5160 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5161                                             struct amdgpu_ring *ring,
5162                                             bool acquire)
5163 {
5164         int i, pipe;
5165         bool reserve;
5166         struct amdgpu_ring *iring;
5167
5168         mutex_lock(&adev->gfx.pipe_reserve_mutex);
5169         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5170         if (acquire)
5171                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5172         else
5173                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5174
5175         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5176                 /* Clear all reservations - everyone reacquires all resources */
5177                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5178                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5179                                                        true);
5180
5181                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5182                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5183                                                        true);
5184         } else {
5185                 /* Lower all pipes without a current reservation */
5186                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5187                         iring = &adev->gfx.gfx_ring[i];
5188                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5189                                                            iring->me,
5190                                                            iring->pipe,
5191                                                            0);
5192                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5193                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5194                 }
5195
5196                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5197                         iring = &adev->gfx.compute_ring[i];
5198                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5199                                                            iring->me,
5200                                                            iring->pipe,
5201                                                            0);
5202                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5203                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5204                 }
5205         }
5206
5207         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5208 }
5209
5210 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5211                                       struct amdgpu_ring *ring,
5212                                       bool acquire)
5213 {
5214         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5215         uint32_t queue_priority = acquire ? 0xf : 0x0;
5216
5217         mutex_lock(&adev->srbm_mutex);
5218         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5219
5220         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5221         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5222
5223         soc15_grbm_select(adev, 0, 0, 0, 0);
5224         mutex_unlock(&adev->srbm_mutex);
5225 }
5226
5227 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5228                                                enum drm_sched_priority priority)
5229 {
5230         struct amdgpu_device *adev = ring->adev;
5231         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5232
5233         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5234                 return;
5235
5236         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5237         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5238 }
5239
5240 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5241 {
5242         struct amdgpu_device *adev = ring->adev;
5243
5244         /* XXX check if swapping is necessary on BE */
5245         if (ring->use_doorbell) {
5246                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5247                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5248         } else{
5249                 BUG(); /* only DOORBELL method supported on gfx9 now */
5250         }
5251 }
5252
5253 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5254                                          u64 seq, unsigned int flags)
5255 {
5256         struct amdgpu_device *adev = ring->adev;
5257
5258         /* we only allocate 32bit for each seq wb address */
5259         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5260
5261         /* write fence seq to the "addr" */
5262         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5263         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5264                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5265         amdgpu_ring_write(ring, lower_32_bits(addr));
5266         amdgpu_ring_write(ring, upper_32_bits(addr));
5267         amdgpu_ring_write(ring, lower_32_bits(seq));
5268
5269         if (flags & AMDGPU_FENCE_FLAG_INT) {
5270                 /* set register to trigger INT */
5271                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5272                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5273                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5274                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5275                 amdgpu_ring_write(ring, 0);
5276                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5277         }
5278 }
5279
5280 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5281 {
5282         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5283         amdgpu_ring_write(ring, 0);
5284 }
5285
5286 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5287 {
5288         struct v9_ce_ib_state ce_payload = {0};
5289         uint64_t csa_addr;
5290         int cnt;
5291
5292         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5293         csa_addr = amdgpu_csa_vaddr(ring->adev);
5294
5295         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5296         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5297                                  WRITE_DATA_DST_SEL(8) |
5298                                  WR_CONFIRM) |
5299                                  WRITE_DATA_CACHE_POLICY(0));
5300         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5301         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5302         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5303 }
5304
5305 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5306 {
5307         struct v9_de_ib_state de_payload = {0};
5308         uint64_t csa_addr, gds_addr;
5309         int cnt;
5310
5311         csa_addr = amdgpu_csa_vaddr(ring->adev);
5312         gds_addr = csa_addr + 4096;
5313         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5314         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5315
5316         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5317         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5318         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5319                                  WRITE_DATA_DST_SEL(8) |
5320                                  WR_CONFIRM) |
5321                                  WRITE_DATA_CACHE_POLICY(0));
5322         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5323         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5324         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5325 }
5326
5327 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5328 {
5329         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5330         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5331 }
5332
5333 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5334 {
5335         uint32_t dw2 = 0;
5336
5337         if (amdgpu_sriov_vf(ring->adev))
5338                 gfx_v9_0_ring_emit_ce_meta(ring);
5339
5340         gfx_v9_0_ring_emit_tmz(ring, true);
5341
5342         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5343         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5344                 /* set load_global_config & load_global_uconfig */
5345                 dw2 |= 0x8001;
5346                 /* set load_cs_sh_regs */
5347                 dw2 |= 0x01000000;
5348                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5349                 dw2 |= 0x10002;
5350
5351                 /* set load_ce_ram if preamble presented */
5352                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5353                         dw2 |= 0x10000000;
5354         } else {
5355                 /* still load_ce_ram if this is the first time preamble presented
5356                  * although there is no context switch happens.
5357                  */
5358                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5359                         dw2 |= 0x10000000;
5360         }
5361
5362         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5363         amdgpu_ring_write(ring, dw2);
5364         amdgpu_ring_write(ring, 0);
5365 }
5366
5367 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5368 {
5369         unsigned ret;
5370         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5371         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5372         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5373         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5374         ret = ring->wptr & ring->buf_mask;
5375         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5376         return ret;
5377 }
5378
5379 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5380 {
5381         unsigned cur;
5382         BUG_ON(offset > ring->buf_mask);
5383         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5384
5385         cur = (ring->wptr & ring->buf_mask) - 1;
5386         if (likely(cur > offset))
5387                 ring->ring[offset] = cur - offset;
5388         else
5389                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5390 }
5391
5392 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5393 {
5394         struct amdgpu_device *adev = ring->adev;
5395
5396         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5397         amdgpu_ring_write(ring, 0 |     /* src: register*/
5398                                 (5 << 8) |      /* dst: memory */
5399                                 (1 << 20));     /* write confirm */
5400         amdgpu_ring_write(ring, reg);
5401         amdgpu_ring_write(ring, 0);
5402         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5403                                 adev->virt.reg_val_offs * 4));
5404         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5405                                 adev->virt.reg_val_offs * 4));
5406 }
5407
5408 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5409                                     uint32_t val)
5410 {
5411         uint32_t cmd = 0;
5412
5413         switch (ring->funcs->type) {
5414         case AMDGPU_RING_TYPE_GFX:
5415                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5416                 break;
5417         case AMDGPU_RING_TYPE_KIQ:
5418                 cmd = (1 << 16); /* no inc addr */
5419                 break;
5420         default:
5421                 cmd = WR_CONFIRM;
5422                 break;
5423         }
5424         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5425         amdgpu_ring_write(ring, cmd);
5426         amdgpu_ring_write(ring, reg);
5427         amdgpu_ring_write(ring, 0);
5428         amdgpu_ring_write(ring, val);
5429 }
5430
5431 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5432                                         uint32_t val, uint32_t mask)
5433 {
5434         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5435 }
5436
5437 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5438                                                   uint32_t reg0, uint32_t reg1,
5439                                                   uint32_t ref, uint32_t mask)
5440 {
5441         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5442         struct amdgpu_device *adev = ring->adev;
5443         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5444                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5445
5446         if (fw_version_ok)
5447                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5448                                       ref, mask, 0x20);
5449         else
5450                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5451                                                            ref, mask);
5452 }
5453
5454 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5455 {
5456         struct amdgpu_device *adev = ring->adev;
5457         uint32_t value = 0;
5458
5459         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5460         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5461         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5462         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5463         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5464 }
5465
5466 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5467                                                  enum amdgpu_interrupt_state state)
5468 {
5469         switch (state) {
5470         case AMDGPU_IRQ_STATE_DISABLE:
5471         case AMDGPU_IRQ_STATE_ENABLE:
5472                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5473                                TIME_STAMP_INT_ENABLE,
5474                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5475                 break;
5476         default:
5477                 break;
5478         }
5479 }
5480
5481 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5482                                                      int me, int pipe,
5483                                                      enum amdgpu_interrupt_state state)
5484 {
5485         u32 mec_int_cntl, mec_int_cntl_reg;
5486
5487         /*
5488          * amdgpu controls only the first MEC. That's why this function only
5489          * handles the setting of interrupts for this specific MEC. All other
5490          * pipes' interrupts are set by amdkfd.
5491          */
5492
5493         if (me == 1) {
5494                 switch (pipe) {
5495                 case 0:
5496                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5497                         break;
5498                 case 1:
5499                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5500                         break;
5501                 case 2:
5502                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5503                         break;
5504                 case 3:
5505                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5506                         break;
5507                 default:
5508                         DRM_DEBUG("invalid pipe %d\n", pipe);
5509                         return;
5510                 }
5511         } else {
5512                 DRM_DEBUG("invalid me %d\n", me);
5513                 return;
5514         }
5515
5516         switch (state) {
5517         case AMDGPU_IRQ_STATE_DISABLE:
5518                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5519                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5520                                              TIME_STAMP_INT_ENABLE, 0);
5521                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5522                 break;
5523         case AMDGPU_IRQ_STATE_ENABLE:
5524                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5525                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5526                                              TIME_STAMP_INT_ENABLE, 1);
5527                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5528                 break;
5529         default:
5530                 break;
5531         }
5532 }
5533
5534 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5535                                              struct amdgpu_irq_src *source,
5536                                              unsigned type,
5537                                              enum amdgpu_interrupt_state state)
5538 {
5539         switch (state) {
5540         case AMDGPU_IRQ_STATE_DISABLE:
5541         case AMDGPU_IRQ_STATE_ENABLE:
5542                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5543                                PRIV_REG_INT_ENABLE,
5544                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5545                 break;
5546         default:
5547                 break;
5548         }
5549
5550         return 0;
5551 }
5552
5553 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5554                                               struct amdgpu_irq_src *source,
5555                                               unsigned type,
5556                                               enum amdgpu_interrupt_state state)
5557 {
5558         switch (state) {
5559         case AMDGPU_IRQ_STATE_DISABLE:
5560         case AMDGPU_IRQ_STATE_ENABLE:
5561                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5562                                PRIV_INSTR_INT_ENABLE,
5563                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5564         default:
5565                 break;
5566         }
5567
5568         return 0;
5569 }
5570
5571 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5572         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5573                         CP_ECC_ERROR_INT_ENABLE, 1)
5574
5575 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5576         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5577                         CP_ECC_ERROR_INT_ENABLE, 0)
5578
5579 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5580                                               struct amdgpu_irq_src *source,
5581                                               unsigned type,
5582                                               enum amdgpu_interrupt_state state)
5583 {
5584         switch (state) {
5585         case AMDGPU_IRQ_STATE_DISABLE:
5586                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5587                                 CP_ECC_ERROR_INT_ENABLE, 0);
5588                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5589                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5590                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5591                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5592                 break;
5593
5594         case AMDGPU_IRQ_STATE_ENABLE:
5595                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5596                                 CP_ECC_ERROR_INT_ENABLE, 1);
5597                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5598                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5599                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5600                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5601                 break;
5602         default:
5603                 break;
5604         }
5605
5606         return 0;
5607 }
5608
5609
5610 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5611                                             struct amdgpu_irq_src *src,
5612                                             unsigned type,
5613                                             enum amdgpu_interrupt_state state)
5614 {
5615         switch (type) {
5616         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5617                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5618                 break;
5619         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5620                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5621                 break;
5622         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5623                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5624                 break;
5625         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5626                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5627                 break;
5628         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5629                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5630                 break;
5631         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5632                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5633                 break;
5634         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5635                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5636                 break;
5637         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5638                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5639                 break;
5640         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5641                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5642                 break;
5643         default:
5644                 break;
5645         }
5646         return 0;
5647 }
5648
5649 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5650                             struct amdgpu_irq_src *source,
5651                             struct amdgpu_iv_entry *entry)
5652 {
5653         int i;
5654         u8 me_id, pipe_id, queue_id;
5655         struct amdgpu_ring *ring;
5656
5657         DRM_DEBUG("IH: CP EOP\n");
5658         me_id = (entry->ring_id & 0x0c) >> 2;
5659         pipe_id = (entry->ring_id & 0x03) >> 0;
5660         queue_id = (entry->ring_id & 0x70) >> 4;
5661
5662         switch (me_id) {
5663         case 0:
5664                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5665                 break;
5666         case 1:
5667         case 2:
5668                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5669                         ring = &adev->gfx.compute_ring[i];
5670                         /* Per-queue interrupt is supported for MEC starting from VI.
5671                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5672                           */
5673                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5674                                 amdgpu_fence_process(ring);
5675                 }
5676                 break;
5677         }
5678         return 0;
5679 }
5680
5681 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5682                            struct amdgpu_iv_entry *entry)
5683 {
5684         u8 me_id, pipe_id, queue_id;
5685         struct amdgpu_ring *ring;
5686         int i;
5687
5688         me_id = (entry->ring_id & 0x0c) >> 2;
5689         pipe_id = (entry->ring_id & 0x03) >> 0;
5690         queue_id = (entry->ring_id & 0x70) >> 4;
5691
5692         switch (me_id) {
5693         case 0:
5694                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5695                 break;
5696         case 1:
5697         case 2:
5698                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5699                         ring = &adev->gfx.compute_ring[i];
5700                         if (ring->me == me_id && ring->pipe == pipe_id &&
5701                             ring->queue == queue_id)
5702                                 drm_sched_fault(&ring->sched);
5703                 }
5704                 break;
5705         }
5706 }
5707
5708 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5709                                  struct amdgpu_irq_src *source,
5710                                  struct amdgpu_iv_entry *entry)
5711 {
5712         DRM_ERROR("Illegal register access in command stream\n");
5713         gfx_v9_0_fault(adev, entry);
5714         return 0;
5715 }
5716
5717 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5718                                   struct amdgpu_irq_src *source,
5719                                   struct amdgpu_iv_entry *entry)
5720 {
5721         DRM_ERROR("Illegal instruction in command stream\n");
5722         gfx_v9_0_fault(adev, entry);
5723         return 0;
5724 }
5725
5726 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5727                 struct ras_err_data *err_data,
5728                 struct amdgpu_iv_entry *entry)
5729 {
5730         /* TODO ue will trigger an interrupt. */
5731         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5732         if (adev->gfx.funcs->query_ras_error_count)
5733                 adev->gfx.funcs->query_ras_error_count(adev, err_data);
5734         amdgpu_ras_reset_gpu(adev, 0);
5735         return AMDGPU_RAS_SUCCESS;
5736 }
5737
5738 static const struct {
5739         const char *name;
5740         uint32_t ip;
5741         uint32_t inst;
5742         uint32_t seg;
5743         uint32_t reg_offset;
5744         uint32_t per_se_instance;
5745         int32_t num_instance;
5746         uint32_t sec_count_mask;
5747         uint32_t ded_count_mask;
5748 } gfx_ras_edc_regs[] = {
5749         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5750           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5751           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5752         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5753           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5754           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5755         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5756           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5757         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5758           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5759         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5760           REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5761           REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5762         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5763           REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5764         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5765           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5766           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5767         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5768           REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5769           REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5770         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5771           REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5772         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5773           REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5774         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5775           REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5776         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5777           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5778           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5779         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5780           REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5781         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5782           0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5783           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5784         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5785           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5786           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5787           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5788         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5789           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5790           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5791         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5792           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5793           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5794           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5795         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5796           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5797           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5798           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5799         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5800           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5801           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5802           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5803         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5804           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5805           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5806           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5807         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5808           REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5809         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5810           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5811           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5812         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5813           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5814         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5815           REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5816         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5817           REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5818         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5819           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5820         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5821           REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5822         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5823           REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5824         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5825           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5826           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5827         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5828           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5829           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5830         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5831           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5832           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5833         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5834           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5835           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5836         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5837           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5838           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5839         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5840           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5841         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5842           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5843         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5844           REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5845         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5846           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5847         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5848           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5849         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5850           REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5851         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5852           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5853         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5854           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5855         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5856           16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5857         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5858           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5859           0 },
5860         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5861           16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5862         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5863           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5864           0 },
5865         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5866           16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5867         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5868           REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5869         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5870           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5871           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5872         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5873           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5874           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5875         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5876           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5877         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5878           REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5879         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5880           REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5881         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5882           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5883           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5884         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5885           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5886           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5887         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5888           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5889           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5890         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5891           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5892           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5893         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5894           REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5895         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5896           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5897           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5898         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5899           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5900           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5901         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5902           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5903           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5904         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5905           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5906           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5907         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5908           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5909           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5910         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5911           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5912           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5913         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5914           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5915           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5916         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5917           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5918           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5919         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5920           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5921           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5922         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5923           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5924           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5925         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5926           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5927           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5928         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5929           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5930           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5931         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5932           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5933           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5934         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5935           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5936           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5937         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5938           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5939           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5940         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5941           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5942           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5943         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5944           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5945           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5946         { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5947           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5948           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5949           0 },
5950         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5951           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5952         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5953           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5954         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5955           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5956         { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5957           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5958           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5959         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5960           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5961           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5962         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5963           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5964           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5965         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5966           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5967           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5968         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5969           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5970           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5971         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5972           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5973           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5974         { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5975           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5976           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5977           0 },
5978         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5979           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5980         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5981           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5982         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5983           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5984         { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5985           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5986           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5987         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5988           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5989           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5990         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5991           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5992           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5993         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5994           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5995           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5996         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5997           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5998           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
5999         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6000           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6001           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
6002         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6003           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
6004         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6005           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
6006         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6007           REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
6008         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6009           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
6010         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6011           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
6012         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6013           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6014           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
6015         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6016           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6017           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
6018         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6019           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6020           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
6021         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6022           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
6023         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6024           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
6025         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6026           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
6027         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6028           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
6029         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6030           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
6031         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6032           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
6033 };
6034
6035 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6036                                      void *inject_if)
6037 {
6038         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6039         int ret;
6040         struct ta_ras_trigger_error_input block_info = { 0 };
6041
6042         if (adev->asic_type != CHIP_VEGA20)
6043                 return -EINVAL;
6044
6045         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6046                 return -EINVAL;
6047
6048         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6049                 return -EPERM;
6050
6051         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6052               info->head.type)) {
6053                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6054                         ras_gfx_subblocks[info->head.sub_block_index].name,
6055                         info->head.type);
6056                 return -EPERM;
6057         }
6058
6059         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6060               info->head.type)) {
6061                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6062                         ras_gfx_subblocks[info->head.sub_block_index].name,
6063                         info->head.type);
6064                 return -EPERM;
6065         }
6066
6067         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6068         block_info.sub_block_index =
6069                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6070         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6071         block_info.address = info->address;
6072         block_info.value = info->value;
6073
6074         mutex_lock(&adev->grbm_idx_mutex);
6075         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6076         mutex_unlock(&adev->grbm_idx_mutex);
6077
6078         return ret;
6079 }
6080
6081 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6082                                           void *ras_error_status)
6083 {
6084         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6085         uint32_t sec_count, ded_count;
6086         uint32_t i;
6087         uint32_t reg_value;
6088         uint32_t se_id, instance_id;
6089
6090         if (adev->asic_type != CHIP_VEGA20)
6091                 return -EINVAL;
6092
6093         err_data->ue_count = 0;
6094         err_data->ce_count = 0;
6095
6096         mutex_lock(&adev->grbm_idx_mutex);
6097         for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6098                 for (instance_id = 0; instance_id < 256; instance_id++) {
6099                         for (i = 0;
6100                              i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6101                              i++) {
6102                                 if (se_id != 0 &&
6103                                     !gfx_ras_edc_regs[i].per_se_instance)
6104                                         continue;
6105                                 if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6106                                         continue;
6107
6108                                 gfx_v9_0_select_se_sh(adev, se_id, 0,
6109                                                       instance_id);
6110
6111                                 reg_value = RREG32(
6112                                         adev->reg_offset[gfx_ras_edc_regs[i].ip]
6113                                                         [gfx_ras_edc_regs[i].inst]
6114                                                         [gfx_ras_edc_regs[i].seg] +
6115                                         gfx_ras_edc_regs[i].reg_offset);
6116                                 sec_count = reg_value &
6117                                             gfx_ras_edc_regs[i].sec_count_mask;
6118                                 ded_count = reg_value &
6119                                             gfx_ras_edc_regs[i].ded_count_mask;
6120                                 if (sec_count) {
6121                                         DRM_INFO(
6122                                                 "Instance[%d][%d]: SubBlock %s, SEC %d\n",
6123                                                 se_id, instance_id,
6124                                                 gfx_ras_edc_regs[i].name,
6125                                                 sec_count);
6126                                         err_data->ce_count++;
6127                                 }
6128
6129                                 if (ded_count) {
6130                                         DRM_INFO(
6131                                                 "Instance[%d][%d]: SubBlock %s, DED %d\n",
6132                                                 se_id, instance_id,
6133                                                 gfx_ras_edc_regs[i].name,
6134                                                 ded_count);
6135                                         err_data->ue_count++;
6136                                 }
6137                         }
6138                 }
6139         }
6140         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6141         mutex_unlock(&adev->grbm_idx_mutex);
6142
6143         return 0;
6144 }
6145
6146 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6147                                   struct amdgpu_irq_src *source,
6148                                   struct amdgpu_iv_entry *entry)
6149 {
6150         struct ras_common_if *ras_if = adev->gfx.ras_if;
6151         struct ras_dispatch_if ih_data = {
6152                 .entry = entry,
6153         };
6154
6155         if (!ras_if)
6156                 return 0;
6157
6158         ih_data.head = *ras_if;
6159
6160         DRM_ERROR("CP ECC ERROR IRQ\n");
6161         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6162         return 0;
6163 }
6164
6165 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6166         .name = "gfx_v9_0",
6167         .early_init = gfx_v9_0_early_init,
6168         .late_init = gfx_v9_0_late_init,
6169         .sw_init = gfx_v9_0_sw_init,
6170         .sw_fini = gfx_v9_0_sw_fini,
6171         .hw_init = gfx_v9_0_hw_init,
6172         .hw_fini = gfx_v9_0_hw_fini,
6173         .suspend = gfx_v9_0_suspend,
6174         .resume = gfx_v9_0_resume,
6175         .is_idle = gfx_v9_0_is_idle,
6176         .wait_for_idle = gfx_v9_0_wait_for_idle,
6177         .soft_reset = gfx_v9_0_soft_reset,
6178         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6179         .set_powergating_state = gfx_v9_0_set_powergating_state,
6180         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6181 };
6182
6183 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6184         .type = AMDGPU_RING_TYPE_GFX,
6185         .align_mask = 0xff,
6186         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6187         .support_64bit_ptrs = true,
6188         .vmhub = AMDGPU_GFXHUB_0,
6189         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6190         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6191         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6192         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6193                 5 +  /* COND_EXEC */
6194                 7 +  /* PIPELINE_SYNC */
6195                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6196                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6197                 2 + /* VM_FLUSH */
6198                 8 +  /* FENCE for VM_FLUSH */
6199                 20 + /* GDS switch */
6200                 4 + /* double SWITCH_BUFFER,
6201                        the first COND_EXEC jump to the place just
6202                            prior to this double SWITCH_BUFFER  */
6203                 5 + /* COND_EXEC */
6204                 7 +      /*     HDP_flush */
6205                 4 +      /*     VGT_flush */
6206                 14 + /* CE_META */
6207                 31 + /* DE_META */
6208                 3 + /* CNTX_CTRL */
6209                 5 + /* HDP_INVL */
6210                 8 + 8 + /* FENCE x2 */
6211                 2, /* SWITCH_BUFFER */
6212         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6213         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6214         .emit_fence = gfx_v9_0_ring_emit_fence,
6215         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6216         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6217         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6218         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6219         .test_ring = gfx_v9_0_ring_test_ring,
6220         .test_ib = gfx_v9_0_ring_test_ib,
6221         .insert_nop = amdgpu_ring_insert_nop,
6222         .pad_ib = amdgpu_ring_generic_pad_ib,
6223         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6224         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6225         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6226         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6227         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6228         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6229         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6230         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6231         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6232 };
6233
6234 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6235         .type = AMDGPU_RING_TYPE_COMPUTE,
6236         .align_mask = 0xff,
6237         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6238         .support_64bit_ptrs = true,
6239         .vmhub = AMDGPU_GFXHUB_0,
6240         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6241         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6242         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6243         .emit_frame_size =
6244                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6245                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6246                 5 + /* hdp invalidate */
6247                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6248                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6249                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6250                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6251                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6252         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6253         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6254         .emit_fence = gfx_v9_0_ring_emit_fence,
6255         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6256         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6257         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6258         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6259         .test_ring = gfx_v9_0_ring_test_ring,
6260         .test_ib = gfx_v9_0_ring_test_ib,
6261         .insert_nop = amdgpu_ring_insert_nop,
6262         .pad_ib = amdgpu_ring_generic_pad_ib,
6263         .set_priority = gfx_v9_0_ring_set_priority_compute,
6264         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6265         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6266         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6267 };
6268
6269 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6270         .type = AMDGPU_RING_TYPE_KIQ,
6271         .align_mask = 0xff,
6272         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6273         .support_64bit_ptrs = true,
6274         .vmhub = AMDGPU_GFXHUB_0,
6275         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6276         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6277         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6278         .emit_frame_size =
6279                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6280                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6281                 5 + /* hdp invalidate */
6282                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6283                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6284                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6285                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6286                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6287         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6288         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6289         .test_ring = gfx_v9_0_ring_test_ring,
6290         .insert_nop = amdgpu_ring_insert_nop,
6291         .pad_ib = amdgpu_ring_generic_pad_ib,
6292         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6293         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6294         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6295         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6296 };
6297
6298 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6299 {
6300         int i;
6301
6302         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6303
6304         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6305                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6306
6307         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6308                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6309 }
6310
6311 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6312         .set = gfx_v9_0_set_eop_interrupt_state,
6313         .process = gfx_v9_0_eop_irq,
6314 };
6315
6316 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6317         .set = gfx_v9_0_set_priv_reg_fault_state,
6318         .process = gfx_v9_0_priv_reg_irq,
6319 };
6320
6321 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6322         .set = gfx_v9_0_set_priv_inst_fault_state,
6323         .process = gfx_v9_0_priv_inst_irq,
6324 };
6325
6326 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6327         .set = gfx_v9_0_set_cp_ecc_error_state,
6328         .process = gfx_v9_0_cp_ecc_error_irq,
6329 };
6330
6331
6332 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6333 {
6334         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6335         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6336
6337         adev->gfx.priv_reg_irq.num_types = 1;
6338         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6339
6340         adev->gfx.priv_inst_irq.num_types = 1;
6341         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6342
6343         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6344         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6345 }
6346
6347 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6348 {
6349         switch (adev->asic_type) {
6350         case CHIP_VEGA10:
6351         case CHIP_VEGA12:
6352         case CHIP_VEGA20:
6353         case CHIP_RAVEN:
6354         case CHIP_ARCTURUS:
6355         case CHIP_RENOIR:
6356                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6357                 break;
6358         default:
6359                 break;
6360         }
6361 }
6362
6363 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6364 {
6365         /* init asci gds info */
6366         switch (adev->asic_type) {
6367         case CHIP_VEGA10:
6368         case CHIP_VEGA12:
6369         case CHIP_VEGA20:
6370                 adev->gds.gds_size = 0x10000;
6371                 break;
6372         case CHIP_RAVEN:
6373         case CHIP_ARCTURUS:
6374                 adev->gds.gds_size = 0x1000;
6375                 break;
6376         default:
6377                 adev->gds.gds_size = 0x10000;
6378                 break;
6379         }
6380
6381         switch (adev->asic_type) {
6382         case CHIP_VEGA10:
6383         case CHIP_VEGA20:
6384                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6385                 break;
6386         case CHIP_VEGA12:
6387                 adev->gds.gds_compute_max_wave_id = 0x27f;
6388                 break;
6389         case CHIP_RAVEN:
6390                 if (adev->rev_id >= 0x8)
6391                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6392                 else
6393                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6394                 break;
6395         case CHIP_ARCTURUS:
6396                 adev->gds.gds_compute_max_wave_id = 0xfff;
6397                 break;
6398         default:
6399                 /* this really depends on the chip */
6400                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6401                 break;
6402         }
6403
6404         adev->gds.gws_size = 64;
6405         adev->gds.oa_size = 16;
6406 }
6407
6408 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6409                                                  u32 bitmap)
6410 {
6411         u32 data;
6412
6413         if (!bitmap)
6414                 return;
6415
6416         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6417         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6418
6419         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6420 }
6421
6422 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6423 {
6424         u32 data, mask;
6425
6426         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6427         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6428
6429         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6430         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6431
6432         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6433
6434         return (~data) & mask;
6435 }
6436
6437 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6438                                  struct amdgpu_cu_info *cu_info)
6439 {
6440         int i, j, k, counter, active_cu_number = 0;
6441         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6442         unsigned disable_masks[4 * 4];
6443
6444         if (!adev || !cu_info)
6445                 return -EINVAL;
6446
6447         /*
6448          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6449          */
6450         if (adev->gfx.config.max_shader_engines *
6451                 adev->gfx.config.max_sh_per_se > 16)
6452                 return -EINVAL;
6453
6454         amdgpu_gfx_parse_disable_cu(disable_masks,
6455                                     adev->gfx.config.max_shader_engines,
6456                                     adev->gfx.config.max_sh_per_se);
6457
6458         mutex_lock(&adev->grbm_idx_mutex);
6459         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6460                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6461                         mask = 1;
6462                         ao_bitmap = 0;
6463                         counter = 0;
6464                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6465                         gfx_v9_0_set_user_cu_inactive_bitmap(
6466                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6467                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6468
6469                         /*
6470                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6471                          * 4x4 size array, and it's usually suitable for Vega
6472                          * ASICs which has 4*2 SE/SH layout.
6473                          * But for Arcturus, SE/SH layout is changed to 8*1.
6474                          * To mostly reduce the impact, we make it compatible
6475                          * with current bitmap array as below:
6476                          *    SE4,SH0 --> bitmap[0][1]
6477                          *    SE5,SH0 --> bitmap[1][1]
6478                          *    SE6,SH0 --> bitmap[2][1]
6479                          *    SE7,SH0 --> bitmap[3][1]
6480                          */
6481                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6482
6483                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6484                                 if (bitmap & mask) {
6485                                         if (counter < adev->gfx.config.max_cu_per_sh)
6486                                                 ao_bitmap |= mask;
6487                                         counter ++;
6488                                 }
6489                                 mask <<= 1;
6490                         }
6491                         active_cu_number += counter;
6492                         if (i < 2 && j < 2)
6493                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6494                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6495                 }
6496         }
6497         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6498         mutex_unlock(&adev->grbm_idx_mutex);
6499
6500         cu_info->number = active_cu_number;
6501         cu_info->ao_cu_mask = ao_cu_mask;
6502         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6503
6504         return 0;
6505 }
6506
6507 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6508 {
6509         .type = AMD_IP_BLOCK_TYPE_GFX,
6510         .major = 9,
6511         .minor = 0,
6512         .rev = 0,
6513         .funcs = &gfx_v9_0_ip_funcs,
6514 };