drm/amdgpu: Add parsing SQ_EDC_INFO to SQ IH v3.
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
139
140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
151
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
170
171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
172 {
173         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
189 };
190
191 static const u32 golden_settings_tonga_a11[] =
192 {
193         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196         mmGB_GPU_ID, 0x0000000f, 0x00000000,
197         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
200         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
201         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
202         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
204         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
205         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
208         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
209 };
210
211 static const u32 tonga_golden_common_all[] =
212 {
213         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
219         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
221 };
222
223 static const u32 tonga_mgcg_cgcg_init[] =
224 {
225         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
300 };
301
302 static const u32 golden_settings_vegam_a11[] =
303 {
304         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314         mmSQ_CONFIG, 0x07f80000, 0x01180000,
315         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 vegam_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
331 };
332
333 static const u32 golden_settings_polaris11_a11[] =
334 {
335         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
337         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
341         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
343         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
345         mmSQ_CONFIG, 0x07f80000, 0x01180000,
346         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
351         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
352 };
353
354 static const u32 polaris11_golden_common_all[] =
355 {
356         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
360         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
362 };
363
364 static const u32 golden_settings_polaris10_a11[] =
365 {
366         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
367         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
369         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377         mmSQ_CONFIG, 0x07f80000, 0x07180000,
378         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
382         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
383 };
384
385 static const u32 polaris10_golden_common_all[] =
386 {
387         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
393         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
395 };
396
397 static const u32 fiji_golden_common_all[] =
398 {
399         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
402         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
403         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
405         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
407         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
409 };
410
411 static const u32 golden_settings_fiji_a10[] =
412 {
413         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
416         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
417         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
419         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
420         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
422         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
423         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
424 };
425
426 static const u32 fiji_mgcg_cgcg_init[] =
427 {
428         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
463 };
464
465 static const u32 golden_settings_iceland_a11[] =
466 {
467         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470         mmGB_GPU_ID, 0x0000000f, 0x00000000,
471         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
475         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
476         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
477         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
479         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
480         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
483 };
484
485 static const u32 iceland_golden_common_all[] =
486 {
487         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
493         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
495 };
496
497 static const u32 iceland_mgcg_cgcg_init[] =
498 {
499         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
563 };
564
565 static const u32 cz_golden_settings_a11[] =
566 {
567         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569         mmGB_GPU_ID, 0x0000000f, 0x00000000,
570         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
573         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
574         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
575         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
576         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
577         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
579 };
580
581 static const u32 cz_golden_common_all[] =
582 {
583         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
589         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
591 };
592
593 static const u32 cz_mgcg_cgcg_init[] =
594 {
595         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
670 };
671
672 static const u32 stoney_golden_settings_a11[] =
673 {
674         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675         mmGB_GPU_ID, 0x0000000f, 0x00000000,
676         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
680         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
681         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
684 };
685
686 static const u32 stoney_golden_common_all[] =
687 {
688         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
694         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
696 };
697
698 static const u32 stoney_mgcg_cgcg_init[] =
699 {
700         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
705 };
706
707
708 static const char * const sq_edc_source_names[] = {
709         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
710         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
711         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
712         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
713         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
714         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
715         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
716 };
717
718 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
719 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
720 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
722 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
723 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
724 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
725 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
726
727 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
728 {
729         switch (adev->asic_type) {
730         case CHIP_TOPAZ:
731                 amdgpu_device_program_register_sequence(adev,
732                                                         iceland_mgcg_cgcg_init,
733                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
734                 amdgpu_device_program_register_sequence(adev,
735                                                         golden_settings_iceland_a11,
736                                                         ARRAY_SIZE(golden_settings_iceland_a11));
737                 amdgpu_device_program_register_sequence(adev,
738                                                         iceland_golden_common_all,
739                                                         ARRAY_SIZE(iceland_golden_common_all));
740                 break;
741         case CHIP_FIJI:
742                 amdgpu_device_program_register_sequence(adev,
743                                                         fiji_mgcg_cgcg_init,
744                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
745                 amdgpu_device_program_register_sequence(adev,
746                                                         golden_settings_fiji_a10,
747                                                         ARRAY_SIZE(golden_settings_fiji_a10));
748                 amdgpu_device_program_register_sequence(adev,
749                                                         fiji_golden_common_all,
750                                                         ARRAY_SIZE(fiji_golden_common_all));
751                 break;
752
753         case CHIP_TONGA:
754                 amdgpu_device_program_register_sequence(adev,
755                                                         tonga_mgcg_cgcg_init,
756                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
757                 amdgpu_device_program_register_sequence(adev,
758                                                         golden_settings_tonga_a11,
759                                                         ARRAY_SIZE(golden_settings_tonga_a11));
760                 amdgpu_device_program_register_sequence(adev,
761                                                         tonga_golden_common_all,
762                                                         ARRAY_SIZE(tonga_golden_common_all));
763                 break;
764         case CHIP_VEGAM:
765                 amdgpu_device_program_register_sequence(adev,
766                                                         golden_settings_vegam_a11,
767                                                         ARRAY_SIZE(golden_settings_vegam_a11));
768                 amdgpu_device_program_register_sequence(adev,
769                                                         vegam_golden_common_all,
770                                                         ARRAY_SIZE(vegam_golden_common_all));
771                 break;
772         case CHIP_POLARIS11:
773         case CHIP_POLARIS12:
774                 amdgpu_device_program_register_sequence(adev,
775                                                         golden_settings_polaris11_a11,
776                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
777                 amdgpu_device_program_register_sequence(adev,
778                                                         polaris11_golden_common_all,
779                                                         ARRAY_SIZE(polaris11_golden_common_all));
780                 break;
781         case CHIP_POLARIS10:
782                 amdgpu_device_program_register_sequence(adev,
783                                                         golden_settings_polaris10_a11,
784                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
785                 amdgpu_device_program_register_sequence(adev,
786                                                         polaris10_golden_common_all,
787                                                         ARRAY_SIZE(polaris10_golden_common_all));
788                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
789                 if (adev->pdev->revision == 0xc7 &&
790                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
791                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
792                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
793                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
794                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
795                 }
796                 break;
797         case CHIP_CARRIZO:
798                 amdgpu_device_program_register_sequence(adev,
799                                                         cz_mgcg_cgcg_init,
800                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
801                 amdgpu_device_program_register_sequence(adev,
802                                                         cz_golden_settings_a11,
803                                                         ARRAY_SIZE(cz_golden_settings_a11));
804                 amdgpu_device_program_register_sequence(adev,
805                                                         cz_golden_common_all,
806                                                         ARRAY_SIZE(cz_golden_common_all));
807                 break;
808         case CHIP_STONEY:
809                 amdgpu_device_program_register_sequence(adev,
810                                                         stoney_mgcg_cgcg_init,
811                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
812                 amdgpu_device_program_register_sequence(adev,
813                                                         stoney_golden_settings_a11,
814                                                         ARRAY_SIZE(stoney_golden_settings_a11));
815                 amdgpu_device_program_register_sequence(adev,
816                                                         stoney_golden_common_all,
817                                                         ARRAY_SIZE(stoney_golden_common_all));
818                 break;
819         default:
820                 break;
821         }
822 }
823
824 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
825 {
826         adev->gfx.scratch.num_reg = 8;
827         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
828         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
829 }
830
831 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
832 {
833         struct amdgpu_device *adev = ring->adev;
834         uint32_t scratch;
835         uint32_t tmp = 0;
836         unsigned i;
837         int r;
838
839         r = amdgpu_gfx_scratch_get(adev, &scratch);
840         if (r) {
841                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
842                 return r;
843         }
844         WREG32(scratch, 0xCAFEDEAD);
845         r = amdgpu_ring_alloc(ring, 3);
846         if (r) {
847                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
848                           ring->idx, r);
849                 amdgpu_gfx_scratch_free(adev, scratch);
850                 return r;
851         }
852         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
853         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
854         amdgpu_ring_write(ring, 0xDEADBEEF);
855         amdgpu_ring_commit(ring);
856
857         for (i = 0; i < adev->usec_timeout; i++) {
858                 tmp = RREG32(scratch);
859                 if (tmp == 0xDEADBEEF)
860                         break;
861                 DRM_UDELAY(1);
862         }
863         if (i < adev->usec_timeout) {
864                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
865                          ring->idx, i);
866         } else {
867                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
868                           ring->idx, scratch, tmp);
869                 r = -EINVAL;
870         }
871         amdgpu_gfx_scratch_free(adev, scratch);
872         return r;
873 }
874
875 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
876 {
877         struct amdgpu_device *adev = ring->adev;
878         struct amdgpu_ib ib;
879         struct dma_fence *f = NULL;
880
881         unsigned int index;
882         uint64_t gpu_addr;
883         uint32_t tmp;
884         long r;
885
886         r = amdgpu_device_wb_get(adev, &index);
887         if (r) {
888                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
889                 return r;
890         }
891
892         gpu_addr = adev->wb.gpu_addr + (index * 4);
893         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
894         memset(&ib, 0, sizeof(ib));
895         r = amdgpu_ib_get(adev, NULL, 16, &ib);
896         if (r) {
897                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
898                 goto err1;
899         }
900         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
901         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
902         ib.ptr[2] = lower_32_bits(gpu_addr);
903         ib.ptr[3] = upper_32_bits(gpu_addr);
904         ib.ptr[4] = 0xDEADBEEF;
905         ib.length_dw = 5;
906
907         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
908         if (r)
909                 goto err2;
910
911         r = dma_fence_wait_timeout(f, false, timeout);
912         if (r == 0) {
913                 DRM_ERROR("amdgpu: IB test timed out.\n");
914                 r = -ETIMEDOUT;
915                 goto err2;
916         } else if (r < 0) {
917                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
918                 goto err2;
919         }
920
921         tmp = adev->wb.wb[index];
922         if (tmp == 0xDEADBEEF) {
923                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
924                 r = 0;
925         } else {
926                 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
927                 r = -EINVAL;
928         }
929
930 err2:
931         amdgpu_ib_free(adev, &ib, NULL);
932         dma_fence_put(f);
933 err1:
934         amdgpu_device_wb_free(adev, index);
935         return r;
936 }
937
938
939 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
940 {
941         release_firmware(adev->gfx.pfp_fw);
942         adev->gfx.pfp_fw = NULL;
943         release_firmware(adev->gfx.me_fw);
944         adev->gfx.me_fw = NULL;
945         release_firmware(adev->gfx.ce_fw);
946         adev->gfx.ce_fw = NULL;
947         release_firmware(adev->gfx.rlc_fw);
948         adev->gfx.rlc_fw = NULL;
949         release_firmware(adev->gfx.mec_fw);
950         adev->gfx.mec_fw = NULL;
951         if ((adev->asic_type != CHIP_STONEY) &&
952             (adev->asic_type != CHIP_TOPAZ))
953                 release_firmware(adev->gfx.mec2_fw);
954         adev->gfx.mec2_fw = NULL;
955
956         kfree(adev->gfx.rlc.register_list_format);
957 }
958
959 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
960 {
961         const char *chip_name;
962         char fw_name[30];
963         int err;
964         struct amdgpu_firmware_info *info = NULL;
965         const struct common_firmware_header *header = NULL;
966         const struct gfx_firmware_header_v1_0 *cp_hdr;
967         const struct rlc_firmware_header_v2_0 *rlc_hdr;
968         unsigned int *tmp = NULL, i;
969
970         DRM_DEBUG("\n");
971
972         switch (adev->asic_type) {
973         case CHIP_TOPAZ:
974                 chip_name = "topaz";
975                 break;
976         case CHIP_TONGA:
977                 chip_name = "tonga";
978                 break;
979         case CHIP_CARRIZO:
980                 chip_name = "carrizo";
981                 break;
982         case CHIP_FIJI:
983                 chip_name = "fiji";
984                 break;
985         case CHIP_STONEY:
986                 chip_name = "stoney";
987                 break;
988         case CHIP_POLARIS10:
989                 chip_name = "polaris10";
990                 break;
991         case CHIP_POLARIS11:
992                 chip_name = "polaris11";
993                 break;
994         case CHIP_POLARIS12:
995                 chip_name = "polaris12";
996                 break;
997         case CHIP_VEGAM:
998                 chip_name = "vegam";
999                 break;
1000         default:
1001                 BUG();
1002         }
1003
1004         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1005                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1006                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1007                 if (err == -ENOENT) {
1008                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1009                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1010                 }
1011         } else {
1012                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1013                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1014         }
1015         if (err)
1016                 goto out;
1017         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1018         if (err)
1019                 goto out;
1020         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1021         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1022         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023
1024         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1026                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1027                 if (err == -ENOENT) {
1028                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1029                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1030                 }
1031         } else {
1032                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1033                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1034         }
1035         if (err)
1036                 goto out;
1037         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1038         if (err)
1039                 goto out;
1040         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1041         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042
1043         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1044
1045         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1046                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1047                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1048                 if (err == -ENOENT) {
1049                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1050                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1051                 }
1052         } else {
1053                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1054                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1055         }
1056         if (err)
1057                 goto out;
1058         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1059         if (err)
1060                 goto out;
1061         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1062         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1063         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1064
1065         /*
1066          * Support for MCBP/Virtualization in combination with chained IBs is
1067          * formal released on feature version #46
1068          */
1069         if (adev->gfx.ce_feature_version >= 46 &&
1070             adev->gfx.pfp_feature_version >= 46) {
1071                 adev->virt.chained_ib_support = true;
1072                 DRM_INFO("Chained IB support enabled!\n");
1073         } else
1074                 adev->virt.chained_ib_support = false;
1075
1076         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1077         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1078         if (err)
1079                 goto out;
1080         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1081         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1082         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1083         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1084
1085         adev->gfx.rlc.save_and_restore_offset =
1086                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1087         adev->gfx.rlc.clear_state_descriptor_offset =
1088                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1089         adev->gfx.rlc.avail_scratch_ram_locations =
1090                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1091         adev->gfx.rlc.reg_restore_list_size =
1092                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1093         adev->gfx.rlc.reg_list_format_start =
1094                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1095         adev->gfx.rlc.reg_list_format_separate_start =
1096                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1097         adev->gfx.rlc.starting_offsets_start =
1098                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1099         adev->gfx.rlc.reg_list_format_size_bytes =
1100                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1101         adev->gfx.rlc.reg_list_size_bytes =
1102                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1103
1104         adev->gfx.rlc.register_list_format =
1105                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1106                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1107
1108         if (!adev->gfx.rlc.register_list_format) {
1109                 err = -ENOMEM;
1110                 goto out;
1111         }
1112
1113         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1114                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1115         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1116                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1117
1118         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1119
1120         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1121                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1122         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1123                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1124
1125         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1126                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1127                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1128                 if (err == -ENOENT) {
1129                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1130                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1131                 }
1132         } else {
1133                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1134                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1135         }
1136         if (err)
1137                 goto out;
1138         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1139         if (err)
1140                 goto out;
1141         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1142         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1143         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1144
1145         if ((adev->asic_type != CHIP_STONEY) &&
1146             (adev->asic_type != CHIP_TOPAZ)) {
1147                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1148                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1149                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1150                         if (err == -ENOENT) {
1151                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1152                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1153                         }
1154                 } else {
1155                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1156                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1157                 }
1158                 if (!err) {
1159                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1160                         if (err)
1161                                 goto out;
1162                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1163                                 adev->gfx.mec2_fw->data;
1164                         adev->gfx.mec2_fw_version =
1165                                 le32_to_cpu(cp_hdr->header.ucode_version);
1166                         adev->gfx.mec2_feature_version =
1167                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1168                 } else {
1169                         err = 0;
1170                         adev->gfx.mec2_fw = NULL;
1171                 }
1172         }
1173
1174         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1175                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1176                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1177                 info->fw = adev->gfx.pfp_fw;
1178                 header = (const struct common_firmware_header *)info->fw->data;
1179                 adev->firmware.fw_size +=
1180                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1181
1182                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1183                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1184                 info->fw = adev->gfx.me_fw;
1185                 header = (const struct common_firmware_header *)info->fw->data;
1186                 adev->firmware.fw_size +=
1187                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1188
1189                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1190                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1191                 info->fw = adev->gfx.ce_fw;
1192                 header = (const struct common_firmware_header *)info->fw->data;
1193                 adev->firmware.fw_size +=
1194                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1195
1196                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1197                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1198                 info->fw = adev->gfx.rlc_fw;
1199                 header = (const struct common_firmware_header *)info->fw->data;
1200                 adev->firmware.fw_size +=
1201                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1202
1203                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1204                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1205                 info->fw = adev->gfx.mec_fw;
1206                 header = (const struct common_firmware_header *)info->fw->data;
1207                 adev->firmware.fw_size +=
1208                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1209
1210                 /* we need account JT in */
1211                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212                 adev->firmware.fw_size +=
1213                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1214
1215                 if (amdgpu_sriov_vf(adev)) {
1216                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1217                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1218                         info->fw = adev->gfx.mec_fw;
1219                         adev->firmware.fw_size +=
1220                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1221                 }
1222
1223                 if (adev->gfx.mec2_fw) {
1224                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1225                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1226                         info->fw = adev->gfx.mec2_fw;
1227                         header = (const struct common_firmware_header *)info->fw->data;
1228                         adev->firmware.fw_size +=
1229                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1230                 }
1231
1232         }
1233
1234 out:
1235         if (err) {
1236                 dev_err(adev->dev,
1237                         "gfx8: Failed to load firmware \"%s\"\n",
1238                         fw_name);
1239                 release_firmware(adev->gfx.pfp_fw);
1240                 adev->gfx.pfp_fw = NULL;
1241                 release_firmware(adev->gfx.me_fw);
1242                 adev->gfx.me_fw = NULL;
1243                 release_firmware(adev->gfx.ce_fw);
1244                 adev->gfx.ce_fw = NULL;
1245                 release_firmware(adev->gfx.rlc_fw);
1246                 adev->gfx.rlc_fw = NULL;
1247                 release_firmware(adev->gfx.mec_fw);
1248                 adev->gfx.mec_fw = NULL;
1249                 release_firmware(adev->gfx.mec2_fw);
1250                 adev->gfx.mec2_fw = NULL;
1251         }
1252         return err;
1253 }
1254
1255 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1256                                     volatile u32 *buffer)
1257 {
1258         u32 count = 0, i;
1259         const struct cs_section_def *sect = NULL;
1260         const struct cs_extent_def *ext = NULL;
1261
1262         if (adev->gfx.rlc.cs_data == NULL)
1263                 return;
1264         if (buffer == NULL)
1265                 return;
1266
1267         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1268         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1269
1270         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1271         buffer[count++] = cpu_to_le32(0x80000000);
1272         buffer[count++] = cpu_to_le32(0x80000000);
1273
1274         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1275                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1276                         if (sect->id == SECT_CONTEXT) {
1277                                 buffer[count++] =
1278                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1279                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1280                                                 PACKET3_SET_CONTEXT_REG_START);
1281                                 for (i = 0; i < ext->reg_count; i++)
1282                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1283                         } else {
1284                                 return;
1285                         }
1286                 }
1287         }
1288
1289         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1290         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1291                         PACKET3_SET_CONTEXT_REG_START);
1292         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1293         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1294
1295         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1296         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1297
1298         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1299         buffer[count++] = cpu_to_le32(0);
1300 }
1301
1302 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1303 {
1304         const __le32 *fw_data;
1305         volatile u32 *dst_ptr;
1306         int me, i, max_me = 4;
1307         u32 bo_offset = 0;
1308         u32 table_offset, table_size;
1309
1310         if (adev->asic_type == CHIP_CARRIZO)
1311                 max_me = 5;
1312
1313         /* write the cp table buffer */
1314         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1315         for (me = 0; me < max_me; me++) {
1316                 if (me == 0) {
1317                         const struct gfx_firmware_header_v1_0 *hdr =
1318                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1319                         fw_data = (const __le32 *)
1320                                 (adev->gfx.ce_fw->data +
1321                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1322                         table_offset = le32_to_cpu(hdr->jt_offset);
1323                         table_size = le32_to_cpu(hdr->jt_size);
1324                 } else if (me == 1) {
1325                         const struct gfx_firmware_header_v1_0 *hdr =
1326                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1327                         fw_data = (const __le32 *)
1328                                 (adev->gfx.pfp_fw->data +
1329                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1330                         table_offset = le32_to_cpu(hdr->jt_offset);
1331                         table_size = le32_to_cpu(hdr->jt_size);
1332                 } else if (me == 2) {
1333                         const struct gfx_firmware_header_v1_0 *hdr =
1334                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1335                         fw_data = (const __le32 *)
1336                                 (adev->gfx.me_fw->data +
1337                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1338                         table_offset = le32_to_cpu(hdr->jt_offset);
1339                         table_size = le32_to_cpu(hdr->jt_size);
1340                 } else if (me == 3) {
1341                         const struct gfx_firmware_header_v1_0 *hdr =
1342                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1343                         fw_data = (const __le32 *)
1344                                 (adev->gfx.mec_fw->data +
1345                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1346                         table_offset = le32_to_cpu(hdr->jt_offset);
1347                         table_size = le32_to_cpu(hdr->jt_size);
1348                 } else  if (me == 4) {
1349                         const struct gfx_firmware_header_v1_0 *hdr =
1350                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1351                         fw_data = (const __le32 *)
1352                                 (adev->gfx.mec2_fw->data +
1353                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1354                         table_offset = le32_to_cpu(hdr->jt_offset);
1355                         table_size = le32_to_cpu(hdr->jt_size);
1356                 }
1357
1358                 for (i = 0; i < table_size; i ++) {
1359                         dst_ptr[bo_offset + i] =
1360                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1361                 }
1362
1363                 bo_offset += table_size;
1364         }
1365 }
1366
1367 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1368 {
1369         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1370         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1371 }
1372
1373 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1374 {
1375         volatile u32 *dst_ptr;
1376         u32 dws;
1377         const struct cs_section_def *cs_data;
1378         int r;
1379
1380         adev->gfx.rlc.cs_data = vi_cs_data;
1381
1382         cs_data = adev->gfx.rlc.cs_data;
1383
1384         if (cs_data) {
1385                 /* clear state block */
1386                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1387
1388                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1389                                               AMDGPU_GEM_DOMAIN_VRAM,
1390                                               &adev->gfx.rlc.clear_state_obj,
1391                                               &adev->gfx.rlc.clear_state_gpu_addr,
1392                                               (void **)&adev->gfx.rlc.cs_ptr);
1393                 if (r) {
1394                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1395                         gfx_v8_0_rlc_fini(adev);
1396                         return r;
1397                 }
1398
1399                 /* set up the cs buffer */
1400                 dst_ptr = adev->gfx.rlc.cs_ptr;
1401                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1402                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1403                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1404         }
1405
1406         if ((adev->asic_type == CHIP_CARRIZO) ||
1407             (adev->asic_type == CHIP_STONEY)) {
1408                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1409                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1410                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1411                                               &adev->gfx.rlc.cp_table_obj,
1412                                               &adev->gfx.rlc.cp_table_gpu_addr,
1413                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1414                 if (r) {
1415                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1416                         return r;
1417                 }
1418
1419                 cz_init_cp_jump_table(adev);
1420
1421                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1422                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1423         }
1424
1425         return 0;
1426 }
1427
1428 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1429 {
1430         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1431 }
1432
1433 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1434 {
1435         int r;
1436         u32 *hpd;
1437         size_t mec_hpd_size;
1438
1439         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1440
1441         /* take ownership of the relevant compute queues */
1442         amdgpu_gfx_compute_queue_acquire(adev);
1443
1444         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1445
1446         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1447                                       AMDGPU_GEM_DOMAIN_GTT,
1448                                       &adev->gfx.mec.hpd_eop_obj,
1449                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1450                                       (void **)&hpd);
1451         if (r) {
1452                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1453                 return r;
1454         }
1455
1456         memset(hpd, 0, mec_hpd_size);
1457
1458         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1459         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1460
1461         return 0;
1462 }
1463
1464 static const u32 vgpr_init_compute_shader[] =
1465 {
1466         0x7e000209, 0x7e020208,
1467         0x7e040207, 0x7e060206,
1468         0x7e080205, 0x7e0a0204,
1469         0x7e0c0203, 0x7e0e0202,
1470         0x7e100201, 0x7e120200,
1471         0x7e140209, 0x7e160208,
1472         0x7e180207, 0x7e1a0206,
1473         0x7e1c0205, 0x7e1e0204,
1474         0x7e200203, 0x7e220202,
1475         0x7e240201, 0x7e260200,
1476         0x7e280209, 0x7e2a0208,
1477         0x7e2c0207, 0x7e2e0206,
1478         0x7e300205, 0x7e320204,
1479         0x7e340203, 0x7e360202,
1480         0x7e380201, 0x7e3a0200,
1481         0x7e3c0209, 0x7e3e0208,
1482         0x7e400207, 0x7e420206,
1483         0x7e440205, 0x7e460204,
1484         0x7e480203, 0x7e4a0202,
1485         0x7e4c0201, 0x7e4e0200,
1486         0x7e500209, 0x7e520208,
1487         0x7e540207, 0x7e560206,
1488         0x7e580205, 0x7e5a0204,
1489         0x7e5c0203, 0x7e5e0202,
1490         0x7e600201, 0x7e620200,
1491         0x7e640209, 0x7e660208,
1492         0x7e680207, 0x7e6a0206,
1493         0x7e6c0205, 0x7e6e0204,
1494         0x7e700203, 0x7e720202,
1495         0x7e740201, 0x7e760200,
1496         0x7e780209, 0x7e7a0208,
1497         0x7e7c0207, 0x7e7e0206,
1498         0xbf8a0000, 0xbf810000,
1499 };
1500
1501 static const u32 sgpr_init_compute_shader[] =
1502 {
1503         0xbe8a0100, 0xbe8c0102,
1504         0xbe8e0104, 0xbe900106,
1505         0xbe920108, 0xbe940100,
1506         0xbe960102, 0xbe980104,
1507         0xbe9a0106, 0xbe9c0108,
1508         0xbe9e0100, 0xbea00102,
1509         0xbea20104, 0xbea40106,
1510         0xbea60108, 0xbea80100,
1511         0xbeaa0102, 0xbeac0104,
1512         0xbeae0106, 0xbeb00108,
1513         0xbeb20100, 0xbeb40102,
1514         0xbeb60104, 0xbeb80106,
1515         0xbeba0108, 0xbebc0100,
1516         0xbebe0102, 0xbec00104,
1517         0xbec20106, 0xbec40108,
1518         0xbec60100, 0xbec80102,
1519         0xbee60004, 0xbee70005,
1520         0xbeea0006, 0xbeeb0007,
1521         0xbee80008, 0xbee90009,
1522         0xbefc0000, 0xbf8a0000,
1523         0xbf810000, 0x00000000,
1524 };
1525
1526 static const u32 vgpr_init_regs[] =
1527 {
1528         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1529         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1530         mmCOMPUTE_NUM_THREAD_X, 256*4,
1531         mmCOMPUTE_NUM_THREAD_Y, 1,
1532         mmCOMPUTE_NUM_THREAD_Z, 1,
1533         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1534         mmCOMPUTE_PGM_RSRC2, 20,
1535         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1536         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1537         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1538         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1539         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1540         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1541         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1542         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1543         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1544         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1545 };
1546
1547 static const u32 sgpr1_init_regs[] =
1548 {
1549         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1550         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1551         mmCOMPUTE_NUM_THREAD_X, 256*5,
1552         mmCOMPUTE_NUM_THREAD_Y, 1,
1553         mmCOMPUTE_NUM_THREAD_Z, 1,
1554         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1555         mmCOMPUTE_PGM_RSRC2, 20,
1556         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1557         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1558         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1559         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1560         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1561         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1562         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1563         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1564         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1565         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1566 };
1567
1568 static const u32 sgpr2_init_regs[] =
1569 {
1570         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1571         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1572         mmCOMPUTE_NUM_THREAD_X, 256*5,
1573         mmCOMPUTE_NUM_THREAD_Y, 1,
1574         mmCOMPUTE_NUM_THREAD_Z, 1,
1575         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1576         mmCOMPUTE_PGM_RSRC2, 20,
1577         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1578         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1579         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1580         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1581         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1582         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1583         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1584         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1585         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1586         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1587 };
1588
1589 static const u32 sec_ded_counter_registers[] =
1590 {
1591         mmCPC_EDC_ATC_CNT,
1592         mmCPC_EDC_SCRATCH_CNT,
1593         mmCPC_EDC_UCODE_CNT,
1594         mmCPF_EDC_ATC_CNT,
1595         mmCPF_EDC_ROQ_CNT,
1596         mmCPF_EDC_TAG_CNT,
1597         mmCPG_EDC_ATC_CNT,
1598         mmCPG_EDC_DMA_CNT,
1599         mmCPG_EDC_TAG_CNT,
1600         mmDC_EDC_CSINVOC_CNT,
1601         mmDC_EDC_RESTORE_CNT,
1602         mmDC_EDC_STATE_CNT,
1603         mmGDS_EDC_CNT,
1604         mmGDS_EDC_GRBM_CNT,
1605         mmGDS_EDC_OA_DED,
1606         mmSPI_EDC_CNT,
1607         mmSQC_ATC_EDC_GATCL1_CNT,
1608         mmSQC_EDC_CNT,
1609         mmSQ_EDC_DED_CNT,
1610         mmSQ_EDC_INFO,
1611         mmSQ_EDC_SEC_CNT,
1612         mmTCC_EDC_CNT,
1613         mmTCP_ATC_EDC_GATCL1_CNT,
1614         mmTCP_EDC_CNT,
1615         mmTD_EDC_CNT
1616 };
1617
1618 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1619 {
1620         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1621         struct amdgpu_ib ib;
1622         struct dma_fence *f = NULL;
1623         int r, i;
1624         u32 tmp;
1625         unsigned total_size, vgpr_offset, sgpr_offset;
1626         u64 gpu_addr;
1627
1628         /* only supported on CZ */
1629         if (adev->asic_type != CHIP_CARRIZO)
1630                 return 0;
1631
1632         /* bail if the compute ring is not ready */
1633         if (!ring->ready)
1634                 return 0;
1635
1636         tmp = RREG32(mmGB_EDC_MODE);
1637         WREG32(mmGB_EDC_MODE, 0);
1638
1639         total_size =
1640                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1641         total_size +=
1642                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1643         total_size +=
1644                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1645         total_size = ALIGN(total_size, 256);
1646         vgpr_offset = total_size;
1647         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1648         sgpr_offset = total_size;
1649         total_size += sizeof(sgpr_init_compute_shader);
1650
1651         /* allocate an indirect buffer to put the commands in */
1652         memset(&ib, 0, sizeof(ib));
1653         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1654         if (r) {
1655                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1656                 return r;
1657         }
1658
1659         /* load the compute shaders */
1660         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1661                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1662
1663         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1664                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1665
1666         /* init the ib length to 0 */
1667         ib.length_dw = 0;
1668
1669         /* VGPR */
1670         /* write the register state for the compute dispatch */
1671         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1672                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1673                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1674                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1675         }
1676         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1677         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1678         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1679         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1680         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1681         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1682
1683         /* write dispatch packet */
1684         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1685         ib.ptr[ib.length_dw++] = 8; /* x */
1686         ib.ptr[ib.length_dw++] = 1; /* y */
1687         ib.ptr[ib.length_dw++] = 1; /* z */
1688         ib.ptr[ib.length_dw++] =
1689                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1690
1691         /* write CS partial flush packet */
1692         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1693         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1694
1695         /* SGPR1 */
1696         /* write the register state for the compute dispatch */
1697         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1698                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1699                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1700                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1701         }
1702         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1703         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1704         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1705         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1706         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1707         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1708
1709         /* write dispatch packet */
1710         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1711         ib.ptr[ib.length_dw++] = 8; /* x */
1712         ib.ptr[ib.length_dw++] = 1; /* y */
1713         ib.ptr[ib.length_dw++] = 1; /* z */
1714         ib.ptr[ib.length_dw++] =
1715                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1716
1717         /* write CS partial flush packet */
1718         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1719         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1720
1721         /* SGPR2 */
1722         /* write the register state for the compute dispatch */
1723         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1724                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1725                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1726                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1727         }
1728         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1729         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1730         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1731         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1732         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1733         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1734
1735         /* write dispatch packet */
1736         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1737         ib.ptr[ib.length_dw++] = 8; /* x */
1738         ib.ptr[ib.length_dw++] = 1; /* y */
1739         ib.ptr[ib.length_dw++] = 1; /* z */
1740         ib.ptr[ib.length_dw++] =
1741                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1742
1743         /* write CS partial flush packet */
1744         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1745         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1746
1747         /* shedule the ib on the ring */
1748         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1749         if (r) {
1750                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1751                 goto fail;
1752         }
1753
1754         /* wait for the GPU to finish processing the IB */
1755         r = dma_fence_wait(f, false);
1756         if (r) {
1757                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1758                 goto fail;
1759         }
1760
1761         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1762         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1763         WREG32(mmGB_EDC_MODE, tmp);
1764
1765         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1766         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1767         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1768
1769
1770         /* read back registers to clear the counters */
1771         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1772                 RREG32(sec_ded_counter_registers[i]);
1773
1774 fail:
1775         amdgpu_ib_free(adev, &ib, NULL);
1776         dma_fence_put(f);
1777
1778         return r;
1779 }
1780
1781 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1782 {
1783         u32 gb_addr_config;
1784         u32 mc_shared_chmap, mc_arb_ramcfg;
1785         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1786         u32 tmp;
1787         int ret;
1788
1789         switch (adev->asic_type) {
1790         case CHIP_TOPAZ:
1791                 adev->gfx.config.max_shader_engines = 1;
1792                 adev->gfx.config.max_tile_pipes = 2;
1793                 adev->gfx.config.max_cu_per_sh = 6;
1794                 adev->gfx.config.max_sh_per_se = 1;
1795                 adev->gfx.config.max_backends_per_se = 2;
1796                 adev->gfx.config.max_texture_channel_caches = 2;
1797                 adev->gfx.config.max_gprs = 256;
1798                 adev->gfx.config.max_gs_threads = 32;
1799                 adev->gfx.config.max_hw_contexts = 8;
1800
1801                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1802                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1803                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1804                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1805                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1806                 break;
1807         case CHIP_FIJI:
1808                 adev->gfx.config.max_shader_engines = 4;
1809                 adev->gfx.config.max_tile_pipes = 16;
1810                 adev->gfx.config.max_cu_per_sh = 16;
1811                 adev->gfx.config.max_sh_per_se = 1;
1812                 adev->gfx.config.max_backends_per_se = 4;
1813                 adev->gfx.config.max_texture_channel_caches = 16;
1814                 adev->gfx.config.max_gprs = 256;
1815                 adev->gfx.config.max_gs_threads = 32;
1816                 adev->gfx.config.max_hw_contexts = 8;
1817
1818                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1819                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1820                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1821                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1822                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1823                 break;
1824         case CHIP_POLARIS11:
1825         case CHIP_POLARIS12:
1826                 ret = amdgpu_atombios_get_gfx_info(adev);
1827                 if (ret)
1828                         return ret;
1829                 adev->gfx.config.max_gprs = 256;
1830                 adev->gfx.config.max_gs_threads = 32;
1831                 adev->gfx.config.max_hw_contexts = 8;
1832
1833                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1837                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1838                 break;
1839         case CHIP_POLARIS10:
1840         case CHIP_VEGAM:
1841                 ret = amdgpu_atombios_get_gfx_info(adev);
1842                 if (ret)
1843                         return ret;
1844                 adev->gfx.config.max_gprs = 256;
1845                 adev->gfx.config.max_gs_threads = 32;
1846                 adev->gfx.config.max_hw_contexts = 8;
1847
1848                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1849                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1850                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1851                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1852                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1853                 break;
1854         case CHIP_TONGA:
1855                 adev->gfx.config.max_shader_engines = 4;
1856                 adev->gfx.config.max_tile_pipes = 8;
1857                 adev->gfx.config.max_cu_per_sh = 8;
1858                 adev->gfx.config.max_sh_per_se = 1;
1859                 adev->gfx.config.max_backends_per_se = 2;
1860                 adev->gfx.config.max_texture_channel_caches = 8;
1861                 adev->gfx.config.max_gprs = 256;
1862                 adev->gfx.config.max_gs_threads = 32;
1863                 adev->gfx.config.max_hw_contexts = 8;
1864
1865                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1866                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1867                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1868                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1869                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1870                 break;
1871         case CHIP_CARRIZO:
1872                 adev->gfx.config.max_shader_engines = 1;
1873                 adev->gfx.config.max_tile_pipes = 2;
1874                 adev->gfx.config.max_sh_per_se = 1;
1875                 adev->gfx.config.max_backends_per_se = 2;
1876                 adev->gfx.config.max_cu_per_sh = 8;
1877                 adev->gfx.config.max_texture_channel_caches = 2;
1878                 adev->gfx.config.max_gprs = 256;
1879                 adev->gfx.config.max_gs_threads = 32;
1880                 adev->gfx.config.max_hw_contexts = 8;
1881
1882                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1887                 break;
1888         case CHIP_STONEY:
1889                 adev->gfx.config.max_shader_engines = 1;
1890                 adev->gfx.config.max_tile_pipes = 2;
1891                 adev->gfx.config.max_sh_per_se = 1;
1892                 adev->gfx.config.max_backends_per_se = 1;
1893                 adev->gfx.config.max_cu_per_sh = 3;
1894                 adev->gfx.config.max_texture_channel_caches = 2;
1895                 adev->gfx.config.max_gprs = 256;
1896                 adev->gfx.config.max_gs_threads = 16;
1897                 adev->gfx.config.max_hw_contexts = 8;
1898
1899                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1904                 break;
1905         default:
1906                 adev->gfx.config.max_shader_engines = 2;
1907                 adev->gfx.config.max_tile_pipes = 4;
1908                 adev->gfx.config.max_cu_per_sh = 2;
1909                 adev->gfx.config.max_sh_per_se = 1;
1910                 adev->gfx.config.max_backends_per_se = 2;
1911                 adev->gfx.config.max_texture_channel_caches = 4;
1912                 adev->gfx.config.max_gprs = 256;
1913                 adev->gfx.config.max_gs_threads = 32;
1914                 adev->gfx.config.max_hw_contexts = 8;
1915
1916                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1917                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1918                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1919                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1920                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1921                 break;
1922         }
1923
1924         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1925         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1926         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1927
1928         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1929         adev->gfx.config.mem_max_burst_length_bytes = 256;
1930         if (adev->flags & AMD_IS_APU) {
1931                 /* Get memory bank mapping mode. */
1932                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1933                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1934                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1935
1936                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1937                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1938                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1939
1940                 /* Validate settings in case only one DIMM installed. */
1941                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1942                         dimm00_addr_map = 0;
1943                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1944                         dimm01_addr_map = 0;
1945                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1946                         dimm10_addr_map = 0;
1947                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1948                         dimm11_addr_map = 0;
1949
1950                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1951                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1952                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1953                         adev->gfx.config.mem_row_size_in_kb = 2;
1954                 else
1955                         adev->gfx.config.mem_row_size_in_kb = 1;
1956         } else {
1957                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1958                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1959                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1960                         adev->gfx.config.mem_row_size_in_kb = 4;
1961         }
1962
1963         adev->gfx.config.shader_engine_tile_size = 32;
1964         adev->gfx.config.num_gpus = 1;
1965         adev->gfx.config.multi_gpu_tile_size = 64;
1966
1967         /* fix up row size */
1968         switch (adev->gfx.config.mem_row_size_in_kb) {
1969         case 1:
1970         default:
1971                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1972                 break;
1973         case 2:
1974                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1975                 break;
1976         case 4:
1977                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1978                 break;
1979         }
1980         adev->gfx.config.gb_addr_config = gb_addr_config;
1981
1982         return 0;
1983 }
1984
1985 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1986                                         int mec, int pipe, int queue)
1987 {
1988         int r;
1989         unsigned irq_type;
1990         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1991
1992         ring = &adev->gfx.compute_ring[ring_id];
1993
1994         /* mec0 is me1 */
1995         ring->me = mec + 1;
1996         ring->pipe = pipe;
1997         ring->queue = queue;
1998
1999         ring->ring_obj = NULL;
2000         ring->use_doorbell = true;
2001         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2002         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2003                                 + (ring_id * GFX8_MEC_HPD_SIZE);
2004         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2005
2006         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2007                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2008                 + ring->pipe;
2009
2010         /* type-2 packets are deprecated on MEC, use type-3 instead */
2011         r = amdgpu_ring_init(adev, ring, 1024,
2012                         &adev->gfx.eop_irq, irq_type);
2013         if (r)
2014                 return r;
2015
2016
2017         return 0;
2018 }
2019
2020 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2021
2022 static int gfx_v8_0_sw_init(void *handle)
2023 {
2024         int i, j, k, r, ring_id;
2025         struct amdgpu_ring *ring;
2026         struct amdgpu_kiq *kiq;
2027         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2028
2029         switch (adev->asic_type) {
2030         case CHIP_TONGA:
2031         case CHIP_CARRIZO:
2032         case CHIP_FIJI:
2033         case CHIP_POLARIS10:
2034         case CHIP_POLARIS11:
2035         case CHIP_POLARIS12:
2036         case CHIP_VEGAM:
2037                 adev->gfx.mec.num_mec = 2;
2038                 break;
2039         case CHIP_TOPAZ:
2040         case CHIP_STONEY:
2041         default:
2042                 adev->gfx.mec.num_mec = 1;
2043                 break;
2044         }
2045
2046         adev->gfx.mec.num_pipe_per_mec = 4;
2047         adev->gfx.mec.num_queue_per_pipe = 8;
2048
2049         /* KIQ event */
2050         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2051         if (r)
2052                 return r;
2053
2054         /* EOP Event */
2055         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2056         if (r)
2057                 return r;
2058
2059         /* Privileged reg */
2060         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2061                               &adev->gfx.priv_reg_irq);
2062         if (r)
2063                 return r;
2064
2065         /* Privileged inst */
2066         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2067                               &adev->gfx.priv_inst_irq);
2068         if (r)
2069                 return r;
2070
2071         /* Add CP EDC/ECC irq  */
2072         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 197,
2073                               &adev->gfx.cp_ecc_error_irq);
2074         if (r)
2075                 return r;
2076
2077         /* SQ interrupts. */
2078         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239,
2079                               &adev->gfx.sq_irq);
2080         if (r) {
2081                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2082                 return r;
2083         }
2084
2085         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2086
2087         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2088
2089         gfx_v8_0_scratch_init(adev);
2090
2091         r = gfx_v8_0_init_microcode(adev);
2092         if (r) {
2093                 DRM_ERROR("Failed to load gfx firmware!\n");
2094                 return r;
2095         }
2096
2097         r = gfx_v8_0_rlc_init(adev);
2098         if (r) {
2099                 DRM_ERROR("Failed to init rlc BOs!\n");
2100                 return r;
2101         }
2102
2103         r = gfx_v8_0_mec_init(adev);
2104         if (r) {
2105                 DRM_ERROR("Failed to init MEC BOs!\n");
2106                 return r;
2107         }
2108
2109         /* set up the gfx ring */
2110         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2111                 ring = &adev->gfx.gfx_ring[i];
2112                 ring->ring_obj = NULL;
2113                 sprintf(ring->name, "gfx");
2114                 /* no gfx doorbells on iceland */
2115                 if (adev->asic_type != CHIP_TOPAZ) {
2116                         ring->use_doorbell = true;
2117                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2118                 }
2119
2120                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2121                                      AMDGPU_CP_IRQ_GFX_EOP);
2122                 if (r)
2123                         return r;
2124         }
2125
2126
2127         /* set up the compute queues - allocate horizontally across pipes */
2128         ring_id = 0;
2129         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2130                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2131                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2132                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2133                                         continue;
2134
2135                                 r = gfx_v8_0_compute_ring_init(adev,
2136                                                                 ring_id,
2137                                                                 i, k, j);
2138                                 if (r)
2139                                         return r;
2140
2141                                 ring_id++;
2142                         }
2143                 }
2144         }
2145
2146         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2147         if (r) {
2148                 DRM_ERROR("Failed to init KIQ BOs!\n");
2149                 return r;
2150         }
2151
2152         kiq = &adev->gfx.kiq;
2153         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2154         if (r)
2155                 return r;
2156
2157         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2158         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2159         if (r)
2160                 return r;
2161
2162         /* reserve GDS, GWS and OA resource for gfx */
2163         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2164                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2165                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2166         if (r)
2167                 return r;
2168
2169         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2170                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2171                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2172         if (r)
2173                 return r;
2174
2175         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2176                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2177                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2178         if (r)
2179                 return r;
2180
2181         adev->gfx.ce_ram_size = 0x8000;
2182
2183         r = gfx_v8_0_gpu_early_init(adev);
2184         if (r)
2185                 return r;
2186
2187         return 0;
2188 }
2189
2190 static int gfx_v8_0_sw_fini(void *handle)
2191 {
2192         int i;
2193         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2194
2195         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2196         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2197         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2198
2199         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2200                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2201         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2202                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2203
2204         amdgpu_gfx_compute_mqd_sw_fini(adev);
2205         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2206         amdgpu_gfx_kiq_fini(adev);
2207
2208         gfx_v8_0_mec_fini(adev);
2209         gfx_v8_0_rlc_fini(adev);
2210         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2211                                 &adev->gfx.rlc.clear_state_gpu_addr,
2212                                 (void **)&adev->gfx.rlc.cs_ptr);
2213         if ((adev->asic_type == CHIP_CARRIZO) ||
2214             (adev->asic_type == CHIP_STONEY)) {
2215                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2216                                 &adev->gfx.rlc.cp_table_gpu_addr,
2217                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2218         }
2219         gfx_v8_0_free_microcode(adev);
2220
2221         return 0;
2222 }
2223
2224 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2225 {
2226         uint32_t *modearray, *mod2array;
2227         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2228         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2229         u32 reg_offset;
2230
2231         modearray = adev->gfx.config.tile_mode_array;
2232         mod2array = adev->gfx.config.macrotile_mode_array;
2233
2234         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2235                 modearray[reg_offset] = 0;
2236
2237         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2238                 mod2array[reg_offset] = 0;
2239
2240         switch (adev->asic_type) {
2241         case CHIP_TOPAZ:
2242                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                 PIPE_CONFIG(ADDR_SURF_P2) |
2244                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2245                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2246                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247                                 PIPE_CONFIG(ADDR_SURF_P2) |
2248                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2249                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2251                                 PIPE_CONFIG(ADDR_SURF_P2) |
2252                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2254                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255                                 PIPE_CONFIG(ADDR_SURF_P2) |
2256                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2258                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259                                 PIPE_CONFIG(ADDR_SURF_P2) |
2260                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2262                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2263                                 PIPE_CONFIG(ADDR_SURF_P2) |
2264                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2266                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267                                 PIPE_CONFIG(ADDR_SURF_P2) |
2268                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2270                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2271                                 PIPE_CONFIG(ADDR_SURF_P2));
2272                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2273                                 PIPE_CONFIG(ADDR_SURF_P2) |
2274                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2275                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277                                  PIPE_CONFIG(ADDR_SURF_P2) |
2278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2280                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2281                                  PIPE_CONFIG(ADDR_SURF_P2) |
2282                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2283                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2284                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2285                                  PIPE_CONFIG(ADDR_SURF_P2) |
2286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2289                                  PIPE_CONFIG(ADDR_SURF_P2) |
2290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2292                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2293                                  PIPE_CONFIG(ADDR_SURF_P2) |
2294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2296                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2297                                  PIPE_CONFIG(ADDR_SURF_P2) |
2298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2300                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2301                                  PIPE_CONFIG(ADDR_SURF_P2) |
2302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2304                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2305                                  PIPE_CONFIG(ADDR_SURF_P2) |
2306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2308                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2309                                  PIPE_CONFIG(ADDR_SURF_P2) |
2310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2312                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2313                                  PIPE_CONFIG(ADDR_SURF_P2) |
2314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2316                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2317                                  PIPE_CONFIG(ADDR_SURF_P2) |
2318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2320                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2321                                  PIPE_CONFIG(ADDR_SURF_P2) |
2322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2324                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2325                                  PIPE_CONFIG(ADDR_SURF_P2) |
2326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2328                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2329                                  PIPE_CONFIG(ADDR_SURF_P2) |
2330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2332                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333                                  PIPE_CONFIG(ADDR_SURF_P2) |
2334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                  PIPE_CONFIG(ADDR_SURF_P2) |
2338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341                                  PIPE_CONFIG(ADDR_SURF_P2) |
2342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2344
2345                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2346                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2347                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2348                                 NUM_BANKS(ADDR_SURF_8_BANK));
2349                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2350                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2351                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352                                 NUM_BANKS(ADDR_SURF_8_BANK));
2353                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2354                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2355                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2356                                 NUM_BANKS(ADDR_SURF_8_BANK));
2357                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2359                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360                                 NUM_BANKS(ADDR_SURF_8_BANK));
2361                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2363                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                 NUM_BANKS(ADDR_SURF_8_BANK));
2365                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2366                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2367                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2368                                 NUM_BANKS(ADDR_SURF_8_BANK));
2369                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2370                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2371                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2372                                 NUM_BANKS(ADDR_SURF_8_BANK));
2373                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2374                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2375                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2376                                 NUM_BANKS(ADDR_SURF_16_BANK));
2377                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2380                                 NUM_BANKS(ADDR_SURF_16_BANK));
2381                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2382                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2384                                  NUM_BANKS(ADDR_SURF_16_BANK));
2385                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2386                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2387                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388                                  NUM_BANKS(ADDR_SURF_16_BANK));
2389                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2390                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2391                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2392                                  NUM_BANKS(ADDR_SURF_16_BANK));
2393                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2394                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2395                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2396                                  NUM_BANKS(ADDR_SURF_16_BANK));
2397                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400                                  NUM_BANKS(ADDR_SURF_8_BANK));
2401
2402                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2403                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2404                             reg_offset != 23)
2405                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2406
2407                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2408                         if (reg_offset != 7)
2409                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2410
2411                 break;
2412         case CHIP_FIJI:
2413         case CHIP_VEGAM:
2414                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2415                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2417                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2418                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2421                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2422                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2423                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2425                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2426                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2427                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2429                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2430                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2433                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2434                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2437                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2438                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2441                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2444                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2445                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2447                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2448                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2451                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2455                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2457                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2459                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2460                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2461                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2462                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2463                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2464                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2465                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2470                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2471                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2473                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2475                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2476                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2479                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2480                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2481                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2484                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2485                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2488                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2489                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2492                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2493                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2496                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2497                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2500                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2501                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2503                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2504                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2505                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2506                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2507                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2508                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2509                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2513                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2517                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2522                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2523                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2527                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2528                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2529                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2530                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2531                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2532                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2534                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2535                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2536
2537                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2540                                 NUM_BANKS(ADDR_SURF_8_BANK));
2541                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2544                                 NUM_BANKS(ADDR_SURF_8_BANK));
2545                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548                                 NUM_BANKS(ADDR_SURF_8_BANK));
2549                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2551                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552                                 NUM_BANKS(ADDR_SURF_8_BANK));
2553                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2555                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2556                                 NUM_BANKS(ADDR_SURF_8_BANK));
2557                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560                                 NUM_BANKS(ADDR_SURF_8_BANK));
2561                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2563                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2564                                 NUM_BANKS(ADDR_SURF_8_BANK));
2565                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2567                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568                                 NUM_BANKS(ADDR_SURF_8_BANK));
2569                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2572                                 NUM_BANKS(ADDR_SURF_8_BANK));
2573                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2575                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2576                                  NUM_BANKS(ADDR_SURF_8_BANK));
2577                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580                                  NUM_BANKS(ADDR_SURF_8_BANK));
2581                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2583                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2584                                  NUM_BANKS(ADDR_SURF_8_BANK));
2585                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2588                                  NUM_BANKS(ADDR_SURF_8_BANK));
2589                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2592                                  NUM_BANKS(ADDR_SURF_4_BANK));
2593
2594                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2595                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2596
2597                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2598                         if (reg_offset != 7)
2599                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2600
2601                 break;
2602         case CHIP_TONGA:
2603                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2606                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2608                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2610                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2611                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2614                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2615                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2618                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2619                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2623                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2626                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2627                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2630                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2631                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2634                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2636                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2637                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2640                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2644                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2648                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2649                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2652                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2653                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2654                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2656                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2660                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2662                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2665                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2666                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2668                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2669                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2670                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2672                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2673                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2674                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2676                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2677                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2678                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2680                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2681                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2682                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2684                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2685                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2686                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2687                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2688                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2689                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2690                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2692                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2693                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2694                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2696                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2697                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2698                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2702                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2704                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2706                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2710                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2711                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2717                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2719                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2721                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2725
2726                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729                                 NUM_BANKS(ADDR_SURF_16_BANK));
2730                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2732                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2733                                 NUM_BANKS(ADDR_SURF_16_BANK));
2734                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2736                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2737                                 NUM_BANKS(ADDR_SURF_16_BANK));
2738                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741                                 NUM_BANKS(ADDR_SURF_16_BANK));
2742                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2744                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2745                                 NUM_BANKS(ADDR_SURF_16_BANK));
2746                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749                                 NUM_BANKS(ADDR_SURF_16_BANK));
2750                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753                                 NUM_BANKS(ADDR_SURF_16_BANK));
2754                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2756                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757                                 NUM_BANKS(ADDR_SURF_16_BANK));
2758                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761                                 NUM_BANKS(ADDR_SURF_16_BANK));
2762                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765                                  NUM_BANKS(ADDR_SURF_16_BANK));
2766                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2769                                  NUM_BANKS(ADDR_SURF_16_BANK));
2770                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2772                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2773                                  NUM_BANKS(ADDR_SURF_8_BANK));
2774                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2776                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2777                                  NUM_BANKS(ADDR_SURF_4_BANK));
2778                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2780                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2781                                  NUM_BANKS(ADDR_SURF_4_BANK));
2782
2783                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2784                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2785
2786                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2787                         if (reg_offset != 7)
2788                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2789
2790                 break;
2791         case CHIP_POLARIS11:
2792         case CHIP_POLARIS12:
2793                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2797                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2801                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2805                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2809                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2822                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2826                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2827                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2828                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2836                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2839                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2840                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2843                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2844                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2846                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2852                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2859                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2863                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2864                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2867                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2868                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2871                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2872                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2875                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2876                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2879                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2880                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2884                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2888                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2890                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2892                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2894                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2896                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2902                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2903                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2907                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2910                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2911                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2914                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2915
2916                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2918                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2919                                 NUM_BANKS(ADDR_SURF_16_BANK));
2920
2921                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2923                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2924                                 NUM_BANKS(ADDR_SURF_16_BANK));
2925
2926                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2928                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                 NUM_BANKS(ADDR_SURF_16_BANK));
2930
2931                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2933                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2934                                 NUM_BANKS(ADDR_SURF_16_BANK));
2935
2936                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2939                                 NUM_BANKS(ADDR_SURF_16_BANK));
2940
2941                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2942                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2943                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2944                                 NUM_BANKS(ADDR_SURF_16_BANK));
2945
2946                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2948                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2949                                 NUM_BANKS(ADDR_SURF_16_BANK));
2950
2951                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2953                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954                                 NUM_BANKS(ADDR_SURF_16_BANK));
2955
2956                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959                                 NUM_BANKS(ADDR_SURF_16_BANK));
2960
2961                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2962                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2963                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2964                                 NUM_BANKS(ADDR_SURF_16_BANK));
2965
2966                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2967                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2968                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2969                                 NUM_BANKS(ADDR_SURF_16_BANK));
2970
2971                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974                                 NUM_BANKS(ADDR_SURF_16_BANK));
2975
2976                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2979                                 NUM_BANKS(ADDR_SURF_8_BANK));
2980
2981                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2983                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2984                                 NUM_BANKS(ADDR_SURF_4_BANK));
2985
2986                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2987                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2988
2989                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2990                         if (reg_offset != 7)
2991                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2992
2993                 break;
2994         case CHIP_POLARIS10:
2995                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2996                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2999                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3000                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3003                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3004                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3007                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3010                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3011                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3016                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3019                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3022                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3024                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3025                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3028                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3029                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3032                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3036                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3038                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3040                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3041                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3042                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3044                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3045                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3046                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3048                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3052                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3053                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3054                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3058                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3060                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3061                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3062                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3064                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3065                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3066                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3069                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3070                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3072                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3073                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3074                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3076                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3077                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3078                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3080                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3081                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3082                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3084                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3085                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3086                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3087                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3088                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3089                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3090                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3093                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3094                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3096                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3098                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3102                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3104                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3105                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3108                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3109                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3112                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3113                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3116                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3117
3118                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121                                 NUM_BANKS(ADDR_SURF_16_BANK));
3122
3123                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3125                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126                                 NUM_BANKS(ADDR_SURF_16_BANK));
3127
3128                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3130                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131                                 NUM_BANKS(ADDR_SURF_16_BANK));
3132
3133                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3135                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136                                 NUM_BANKS(ADDR_SURF_16_BANK));
3137
3138                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3140                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3141                                 NUM_BANKS(ADDR_SURF_16_BANK));
3142
3143                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3146                                 NUM_BANKS(ADDR_SURF_16_BANK));
3147
3148                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3151                                 NUM_BANKS(ADDR_SURF_16_BANK));
3152
3153                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3155                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3156                                 NUM_BANKS(ADDR_SURF_16_BANK));
3157
3158                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3160                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3161                                 NUM_BANKS(ADDR_SURF_16_BANK));
3162
3163                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3165                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3166                                 NUM_BANKS(ADDR_SURF_16_BANK));
3167
3168                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3171                                 NUM_BANKS(ADDR_SURF_16_BANK));
3172
3173                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3174                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3175                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3176                                 NUM_BANKS(ADDR_SURF_8_BANK));
3177
3178                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3179                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3180                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3181                                 NUM_BANKS(ADDR_SURF_4_BANK));
3182
3183                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3185                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3186                                 NUM_BANKS(ADDR_SURF_4_BANK));
3187
3188                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3189                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3190
3191                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3192                         if (reg_offset != 7)
3193                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3194
3195                 break;
3196         case CHIP_STONEY:
3197                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198                                 PIPE_CONFIG(ADDR_SURF_P2) |
3199                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3200                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3201                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3202                                 PIPE_CONFIG(ADDR_SURF_P2) |
3203                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3204                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3205                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3206                                 PIPE_CONFIG(ADDR_SURF_P2) |
3207                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3208                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3209                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3210                                 PIPE_CONFIG(ADDR_SURF_P2) |
3211                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3212                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3213                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3214                                 PIPE_CONFIG(ADDR_SURF_P2) |
3215                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3216                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3218                                 PIPE_CONFIG(ADDR_SURF_P2) |
3219                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3220                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3221                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3222                                 PIPE_CONFIG(ADDR_SURF_P2) |
3223                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3224                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3226                                 PIPE_CONFIG(ADDR_SURF_P2));
3227                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3228                                 PIPE_CONFIG(ADDR_SURF_P2) |
3229                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3230                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3231                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232                                  PIPE_CONFIG(ADDR_SURF_P2) |
3233                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3234                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3236                                  PIPE_CONFIG(ADDR_SURF_P2) |
3237                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3238                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3239                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3240                                  PIPE_CONFIG(ADDR_SURF_P2) |
3241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3243                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3244                                  PIPE_CONFIG(ADDR_SURF_P2) |
3245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3248                                  PIPE_CONFIG(ADDR_SURF_P2) |
3249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3251                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3252                                  PIPE_CONFIG(ADDR_SURF_P2) |
3253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3255                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3256                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3259                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3260                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3263                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3264                                  PIPE_CONFIG(ADDR_SURF_P2) |
3265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3267                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3268                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3271                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3272                                  PIPE_CONFIG(ADDR_SURF_P2) |
3273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3275                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3276                                  PIPE_CONFIG(ADDR_SURF_P2) |
3277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3279                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3280                                  PIPE_CONFIG(ADDR_SURF_P2) |
3281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3284                                  PIPE_CONFIG(ADDR_SURF_P2) |
3285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3288                                  PIPE_CONFIG(ADDR_SURF_P2) |
3289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3291                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                  PIPE_CONFIG(ADDR_SURF_P2) |
3293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3295                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3296                                  PIPE_CONFIG(ADDR_SURF_P2) |
3297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3299
3300                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3301                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303                                 NUM_BANKS(ADDR_SURF_8_BANK));
3304                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3305                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                 NUM_BANKS(ADDR_SURF_8_BANK));
3308                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3310                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3311                                 NUM_BANKS(ADDR_SURF_8_BANK));
3312                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3315                                 NUM_BANKS(ADDR_SURF_8_BANK));
3316                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319                                 NUM_BANKS(ADDR_SURF_8_BANK));
3320                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3323                                 NUM_BANKS(ADDR_SURF_8_BANK));
3324                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327                                 NUM_BANKS(ADDR_SURF_8_BANK));
3328                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331                                 NUM_BANKS(ADDR_SURF_16_BANK));
3332                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3335                                 NUM_BANKS(ADDR_SURF_16_BANK));
3336                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3337                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3338                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3339                                  NUM_BANKS(ADDR_SURF_16_BANK));
3340                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3341                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3342                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3343                                  NUM_BANKS(ADDR_SURF_16_BANK));
3344                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3346                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3347                                  NUM_BANKS(ADDR_SURF_16_BANK));
3348                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3350                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3351                                  NUM_BANKS(ADDR_SURF_16_BANK));
3352                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3353                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3354                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3355                                  NUM_BANKS(ADDR_SURF_8_BANK));
3356
3357                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3358                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3359                             reg_offset != 23)
3360                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3361
3362                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3363                         if (reg_offset != 7)
3364                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3365
3366                 break;
3367         default:
3368                 dev_warn(adev->dev,
3369                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3370                          adev->asic_type);
3371
3372         case CHIP_CARRIZO:
3373                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3374                                 PIPE_CONFIG(ADDR_SURF_P2) |
3375                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3376                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3377                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3378                                 PIPE_CONFIG(ADDR_SURF_P2) |
3379                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3380                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3381                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3382                                 PIPE_CONFIG(ADDR_SURF_P2) |
3383                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3384                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3385                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3386                                 PIPE_CONFIG(ADDR_SURF_P2) |
3387                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3388                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3389                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3390                                 PIPE_CONFIG(ADDR_SURF_P2) |
3391                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3393                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3394                                 PIPE_CONFIG(ADDR_SURF_P2) |
3395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3397                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3398                                 PIPE_CONFIG(ADDR_SURF_P2) |
3399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3401                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3402                                 PIPE_CONFIG(ADDR_SURF_P2));
3403                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3404                                 PIPE_CONFIG(ADDR_SURF_P2) |
3405                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3406                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3407                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3408                                  PIPE_CONFIG(ADDR_SURF_P2) |
3409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3411                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3412                                  PIPE_CONFIG(ADDR_SURF_P2) |
3413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3415                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3416                                  PIPE_CONFIG(ADDR_SURF_P2) |
3417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3419                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3420                                  PIPE_CONFIG(ADDR_SURF_P2) |
3421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3423                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3424                                  PIPE_CONFIG(ADDR_SURF_P2) |
3425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3427                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3428                                  PIPE_CONFIG(ADDR_SURF_P2) |
3429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3431                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3432                                  PIPE_CONFIG(ADDR_SURF_P2) |
3433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3435                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3436                                  PIPE_CONFIG(ADDR_SURF_P2) |
3437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3439                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3440                                  PIPE_CONFIG(ADDR_SURF_P2) |
3441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3443                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3444                                  PIPE_CONFIG(ADDR_SURF_P2) |
3445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3447                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3448                                  PIPE_CONFIG(ADDR_SURF_P2) |
3449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3451                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3452                                  PIPE_CONFIG(ADDR_SURF_P2) |
3453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3455                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3456                                  PIPE_CONFIG(ADDR_SURF_P2) |
3457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3459                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3460                                  PIPE_CONFIG(ADDR_SURF_P2) |
3461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3464                                  PIPE_CONFIG(ADDR_SURF_P2) |
3465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3467                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3468                                  PIPE_CONFIG(ADDR_SURF_P2) |
3469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3471                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3472                                  PIPE_CONFIG(ADDR_SURF_P2) |
3473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3475
3476                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3477                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3478                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479                                 NUM_BANKS(ADDR_SURF_8_BANK));
3480                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3481                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483                                 NUM_BANKS(ADDR_SURF_8_BANK));
3484                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3486                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3487                                 NUM_BANKS(ADDR_SURF_8_BANK));
3488                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3491                                 NUM_BANKS(ADDR_SURF_8_BANK));
3492                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3494                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3495                                 NUM_BANKS(ADDR_SURF_8_BANK));
3496                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3497                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3498                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3499                                 NUM_BANKS(ADDR_SURF_8_BANK));
3500                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3501                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3502                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3503                                 NUM_BANKS(ADDR_SURF_8_BANK));
3504                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3505                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3506                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3507                                 NUM_BANKS(ADDR_SURF_16_BANK));
3508                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3509                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3510                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3511                                 NUM_BANKS(ADDR_SURF_16_BANK));
3512                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3513                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3514                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3515                                  NUM_BANKS(ADDR_SURF_16_BANK));
3516                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3517                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3518                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3519                                  NUM_BANKS(ADDR_SURF_16_BANK));
3520                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3521                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3522                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3523                                  NUM_BANKS(ADDR_SURF_16_BANK));
3524                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3525                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3526                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3527                                  NUM_BANKS(ADDR_SURF_16_BANK));
3528                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3529                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3530                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3531                                  NUM_BANKS(ADDR_SURF_8_BANK));
3532
3533                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3534                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3535                             reg_offset != 23)
3536                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3537
3538                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3539                         if (reg_offset != 7)
3540                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3541
3542                 break;
3543         }
3544 }
3545
3546 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3547                                   u32 se_num, u32 sh_num, u32 instance)
3548 {
3549         u32 data;
3550
3551         if (instance == 0xffffffff)
3552                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3553         else
3554                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3555
3556         if (se_num == 0xffffffff)
3557                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3558         else
3559                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3560
3561         if (sh_num == 0xffffffff)
3562                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3563         else
3564                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3565
3566         WREG32(mmGRBM_GFX_INDEX, data);
3567 }
3568
3569 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3570                                   u32 me, u32 pipe, u32 q)
3571 {
3572         vi_srbm_select(adev, me, pipe, q, 0);
3573 }
3574
3575 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3576 {
3577         u32 data, mask;
3578
3579         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3580                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3581
3582         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3583
3584         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3585                                          adev->gfx.config.max_sh_per_se);
3586
3587         return (~data) & mask;
3588 }
3589
3590 static void
3591 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3592 {
3593         switch (adev->asic_type) {
3594         case CHIP_FIJI:
3595         case CHIP_VEGAM:
3596                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3597                           RB_XSEL2(1) | PKR_MAP(2) |
3598                           PKR_XSEL(1) | PKR_YSEL(1) |
3599                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3600                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3601                            SE_PAIR_YSEL(2);
3602                 break;
3603         case CHIP_TONGA:
3604         case CHIP_POLARIS10:
3605                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3606                           SE_XSEL(1) | SE_YSEL(1);
3607                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3608                            SE_PAIR_YSEL(2);
3609                 break;
3610         case CHIP_TOPAZ:
3611         case CHIP_CARRIZO:
3612                 *rconf |= RB_MAP_PKR0(2);
3613                 *rconf1 |= 0x0;
3614                 break;
3615         case CHIP_POLARIS11:
3616         case CHIP_POLARIS12:
3617                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3618                           SE_XSEL(1) | SE_YSEL(1);
3619                 *rconf1 |= 0x0;
3620                 break;
3621         case CHIP_STONEY:
3622                 *rconf |= 0x0;
3623                 *rconf1 |= 0x0;
3624                 break;
3625         default:
3626                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3627                 break;
3628         }
3629 }
3630
3631 static void
3632 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3633                                         u32 raster_config, u32 raster_config_1,
3634                                         unsigned rb_mask, unsigned num_rb)
3635 {
3636         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3637         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3638         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3639         unsigned rb_per_se = num_rb / num_se;
3640         unsigned se_mask[4];
3641         unsigned se;
3642
3643         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3644         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3645         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3646         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3647
3648         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3649         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3650         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3651
3652         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3653                              (!se_mask[2] && !se_mask[3]))) {
3654                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3655
3656                 if (!se_mask[0] && !se_mask[1]) {
3657                         raster_config_1 |=
3658                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3659                 } else {
3660                         raster_config_1 |=
3661                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3662                 }
3663         }
3664
3665         for (se = 0; se < num_se; se++) {
3666                 unsigned raster_config_se = raster_config;
3667                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3668                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3669                 int idx = (se / 2) * 2;
3670
3671                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3672                         raster_config_se &= ~SE_MAP_MASK;
3673
3674                         if (!se_mask[idx]) {
3675                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3676                         } else {
3677                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3678                         }
3679                 }
3680
3681                 pkr0_mask &= rb_mask;
3682                 pkr1_mask &= rb_mask;
3683                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3684                         raster_config_se &= ~PKR_MAP_MASK;
3685
3686                         if (!pkr0_mask) {
3687                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3688                         } else {
3689                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3690                         }
3691                 }
3692
3693                 if (rb_per_se >= 2) {
3694                         unsigned rb0_mask = 1 << (se * rb_per_se);
3695                         unsigned rb1_mask = rb0_mask << 1;
3696
3697                         rb0_mask &= rb_mask;
3698                         rb1_mask &= rb_mask;
3699                         if (!rb0_mask || !rb1_mask) {
3700                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3701
3702                                 if (!rb0_mask) {
3703                                         raster_config_se |=
3704                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3705                                 } else {
3706                                         raster_config_se |=
3707                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3708                                 }
3709                         }
3710
3711                         if (rb_per_se > 2) {
3712                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3713                                 rb1_mask = rb0_mask << 1;
3714                                 rb0_mask &= rb_mask;
3715                                 rb1_mask &= rb_mask;
3716                                 if (!rb0_mask || !rb1_mask) {
3717                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3718
3719                                         if (!rb0_mask) {
3720                                                 raster_config_se |=
3721                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3722                                         } else {
3723                                                 raster_config_se |=
3724                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3725                                         }
3726                                 }
3727                         }
3728                 }
3729
3730                 /* GRBM_GFX_INDEX has a different offset on VI */
3731                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3732                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3733                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3734         }
3735
3736         /* GRBM_GFX_INDEX has a different offset on VI */
3737         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3738 }
3739
3740 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3741 {
3742         int i, j;
3743         u32 data;
3744         u32 raster_config = 0, raster_config_1 = 0;
3745         u32 active_rbs = 0;
3746         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3747                                         adev->gfx.config.max_sh_per_se;
3748         unsigned num_rb_pipes;
3749
3750         mutex_lock(&adev->grbm_idx_mutex);
3751         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3752                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3753                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3754                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3755                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3756                                                rb_bitmap_width_per_sh);
3757                 }
3758         }
3759         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3760
3761         adev->gfx.config.backend_enable_mask = active_rbs;
3762         adev->gfx.config.num_rbs = hweight32(active_rbs);
3763
3764         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3765                              adev->gfx.config.max_shader_engines, 16);
3766
3767         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3768
3769         if (!adev->gfx.config.backend_enable_mask ||
3770                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3771                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3772                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3773         } else {
3774                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3775                                                         adev->gfx.config.backend_enable_mask,
3776                                                         num_rb_pipes);
3777         }
3778
3779         /* cache the values for userspace */
3780         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3782                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3783                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3784                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3785                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3786                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3787                         adev->gfx.config.rb_config[i][j].raster_config =
3788                                 RREG32(mmPA_SC_RASTER_CONFIG);
3789                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3790                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3791                 }
3792         }
3793         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794         mutex_unlock(&adev->grbm_idx_mutex);
3795 }
3796
3797 /**
3798  * gfx_v8_0_init_compute_vmid - gart enable
3799  *
3800  * @adev: amdgpu_device pointer
3801  *
3802  * Initialize compute vmid sh_mem registers
3803  *
3804  */
3805 #define DEFAULT_SH_MEM_BASES    (0x6000)
3806 #define FIRST_COMPUTE_VMID      (8)
3807 #define LAST_COMPUTE_VMID       (16)
3808 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3809 {
3810         int i;
3811         uint32_t sh_mem_config;
3812         uint32_t sh_mem_bases;
3813
3814         /*
3815          * Configure apertures:
3816          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3817          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3818          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3819          */
3820         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3821
3822         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3823                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3824                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3825                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3826                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3827                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3828
3829         mutex_lock(&adev->srbm_mutex);
3830         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3831                 vi_srbm_select(adev, 0, 0, 0, i);
3832                 /* CP and shaders */
3833                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3834                 WREG32(mmSH_MEM_APE1_BASE, 1);
3835                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3836                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3837         }
3838         vi_srbm_select(adev, 0, 0, 0, 0);
3839         mutex_unlock(&adev->srbm_mutex);
3840 }
3841
3842 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3843 {
3844         switch (adev->asic_type) {
3845         default:
3846                 adev->gfx.config.double_offchip_lds_buf = 1;
3847                 break;
3848         case CHIP_CARRIZO:
3849         case CHIP_STONEY:
3850                 adev->gfx.config.double_offchip_lds_buf = 0;
3851                 break;
3852         }
3853 }
3854
3855 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3856 {
3857         u32 tmp, sh_static_mem_cfg;
3858         int i;
3859
3860         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3861         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3862         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3863         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3864
3865         gfx_v8_0_tiling_mode_table_init(adev);
3866         gfx_v8_0_setup_rb(adev);
3867         gfx_v8_0_get_cu_info(adev);
3868         gfx_v8_0_config_init(adev);
3869
3870         /* XXX SH_MEM regs */
3871         /* where to put LDS, scratch, GPUVM in FSA64 space */
3872         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3873                                    SWIZZLE_ENABLE, 1);
3874         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3875                                    ELEMENT_SIZE, 1);
3876         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3877                                    INDEX_STRIDE, 3);
3878         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3879
3880         mutex_lock(&adev->srbm_mutex);
3881         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3882                 vi_srbm_select(adev, 0, 0, 0, i);
3883                 /* CP and shaders */
3884                 if (i == 0) {
3885                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3886                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3887                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3888                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3889                         WREG32(mmSH_MEM_CONFIG, tmp);
3890                         WREG32(mmSH_MEM_BASES, 0);
3891                 } else {
3892                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3893                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3894                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3895                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3896                         WREG32(mmSH_MEM_CONFIG, tmp);
3897                         tmp = adev->gmc.shared_aperture_start >> 48;
3898                         WREG32(mmSH_MEM_BASES, tmp);
3899                 }
3900
3901                 WREG32(mmSH_MEM_APE1_BASE, 1);
3902                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3903         }
3904         vi_srbm_select(adev, 0, 0, 0, 0);
3905         mutex_unlock(&adev->srbm_mutex);
3906
3907         gfx_v8_0_init_compute_vmid(adev);
3908
3909         mutex_lock(&adev->grbm_idx_mutex);
3910         /*
3911          * making sure that the following register writes will be broadcasted
3912          * to all the shaders
3913          */
3914         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3915
3916         WREG32(mmPA_SC_FIFO_SIZE,
3917                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3918                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3919                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3920                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3921                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3922                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3923                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3924                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3925
3926         tmp = RREG32(mmSPI_ARB_PRIORITY);
3927         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3928         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3929         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3930         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3931         WREG32(mmSPI_ARB_PRIORITY, tmp);
3932
3933         mutex_unlock(&adev->grbm_idx_mutex);
3934
3935 }
3936
3937 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3938 {
3939         u32 i, j, k;
3940         u32 mask;
3941
3942         mutex_lock(&adev->grbm_idx_mutex);
3943         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3944                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3945                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3946                         for (k = 0; k < adev->usec_timeout; k++) {
3947                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3948                                         break;
3949                                 udelay(1);
3950                         }
3951                         if (k == adev->usec_timeout) {
3952                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3953                                                       0xffffffff, 0xffffffff);
3954                                 mutex_unlock(&adev->grbm_idx_mutex);
3955                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3956                                          i, j);
3957                                 return;
3958                         }
3959                 }
3960         }
3961         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3962         mutex_unlock(&adev->grbm_idx_mutex);
3963
3964         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3965                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3966                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3967                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3968         for (k = 0; k < adev->usec_timeout; k++) {
3969                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3970                         break;
3971                 udelay(1);
3972         }
3973 }
3974
3975 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3976                                                bool enable)
3977 {
3978         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3979
3980         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3981         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3982         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3983         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3984
3985         WREG32(mmCP_INT_CNTL_RING0, tmp);
3986 }
3987
3988 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3989 {
3990         /* csib */
3991         WREG32(mmRLC_CSIB_ADDR_HI,
3992                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3993         WREG32(mmRLC_CSIB_ADDR_LO,
3994                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3995         WREG32(mmRLC_CSIB_LENGTH,
3996                         adev->gfx.rlc.clear_state_size);
3997 }
3998
3999 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4000                                 int ind_offset,
4001                                 int list_size,
4002                                 int *unique_indices,
4003                                 int *indices_count,
4004                                 int max_indices,
4005                                 int *ind_start_offsets,
4006                                 int *offset_count,
4007                                 int max_offset)
4008 {
4009         int indices;
4010         bool new_entry = true;
4011
4012         for (; ind_offset < list_size; ind_offset++) {
4013
4014                 if (new_entry) {
4015                         new_entry = false;
4016                         ind_start_offsets[*offset_count] = ind_offset;
4017                         *offset_count = *offset_count + 1;
4018                         BUG_ON(*offset_count >= max_offset);
4019                 }
4020
4021                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4022                         new_entry = true;
4023                         continue;
4024                 }
4025
4026                 ind_offset += 2;
4027
4028                 /* look for the matching indice */
4029                 for (indices = 0;
4030                         indices < *indices_count;
4031                         indices++) {
4032                         if (unique_indices[indices] ==
4033                                 register_list_format[ind_offset])
4034                                 break;
4035                 }
4036
4037                 if (indices >= *indices_count) {
4038                         unique_indices[*indices_count] =
4039                                 register_list_format[ind_offset];
4040                         indices = *indices_count;
4041                         *indices_count = *indices_count + 1;
4042                         BUG_ON(*indices_count >= max_indices);
4043                 }
4044
4045                 register_list_format[ind_offset] = indices;
4046         }
4047 }
4048
4049 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4050 {
4051         int i, temp, data;
4052         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4053         int indices_count = 0;
4054         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4055         int offset_count = 0;
4056
4057         int list_size;
4058         unsigned int *register_list_format =
4059                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4060         if (!register_list_format)
4061                 return -ENOMEM;
4062         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4063                         adev->gfx.rlc.reg_list_format_size_bytes);
4064
4065         gfx_v8_0_parse_ind_reg_list(register_list_format,
4066                                 RLC_FormatDirectRegListLength,
4067                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4068                                 unique_indices,
4069                                 &indices_count,
4070                                 ARRAY_SIZE(unique_indices),
4071                                 indirect_start_offsets,
4072                                 &offset_count,
4073                                 ARRAY_SIZE(indirect_start_offsets));
4074
4075         /* save and restore list */
4076         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4077
4078         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4079         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4080                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4081
4082         /* indirect list */
4083         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4084         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4085                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4086
4087         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4088         list_size = list_size >> 1;
4089         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4090         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4091
4092         /* starting offsets starts */
4093         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4094                 adev->gfx.rlc.starting_offsets_start);
4095         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4096                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4097                                 indirect_start_offsets[i]);
4098
4099         /* unique indices */
4100         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4101         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4102         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4103                 if (unique_indices[i] != 0) {
4104                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4105                         WREG32(data + i, unique_indices[i] >> 20);
4106                 }
4107         }
4108         kfree(register_list_format);
4109
4110         return 0;
4111 }
4112
4113 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4114 {
4115         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4116 }
4117
4118 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4119 {
4120         uint32_t data;
4121
4122         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4123
4124         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4125         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4126         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4127         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4128         WREG32(mmRLC_PG_DELAY, data);
4129
4130         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4131         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4132
4133 }
4134
4135 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4136                                                 bool enable)
4137 {
4138         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4139 }
4140
4141 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4142                                                   bool enable)
4143 {
4144         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4145 }
4146
4147 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4148 {
4149         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4150 }
4151
4152 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4153 {
4154         if ((adev->asic_type == CHIP_CARRIZO) ||
4155             (adev->asic_type == CHIP_STONEY)) {
4156                 gfx_v8_0_init_csb(adev);
4157                 gfx_v8_0_init_save_restore_list(adev);
4158                 gfx_v8_0_enable_save_restore_machine(adev);
4159                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4160                 gfx_v8_0_init_power_gating(adev);
4161                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4162         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4163                    (adev->asic_type == CHIP_POLARIS12) ||
4164                    (adev->asic_type == CHIP_VEGAM)) {
4165                 gfx_v8_0_init_csb(adev);
4166                 gfx_v8_0_init_save_restore_list(adev);
4167                 gfx_v8_0_enable_save_restore_machine(adev);
4168                 gfx_v8_0_init_power_gating(adev);
4169         }
4170
4171 }
4172
4173 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4174 {
4175         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4176
4177         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4178         gfx_v8_0_wait_for_rlc_serdes(adev);
4179 }
4180
4181 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4182 {
4183         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4184         udelay(50);
4185
4186         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4187         udelay(50);
4188 }
4189
4190 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4191 {
4192         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4193
4194         /* carrizo do enable cp interrupt after cp inited */
4195         if (!(adev->flags & AMD_IS_APU))
4196                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4197
4198         udelay(50);
4199 }
4200
4201 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4202 {
4203         const struct rlc_firmware_header_v2_0 *hdr;
4204         const __le32 *fw_data;
4205         unsigned i, fw_size;
4206
4207         if (!adev->gfx.rlc_fw)
4208                 return -EINVAL;
4209
4210         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4211         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4212
4213         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4214                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4215         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4216
4217         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4218         for (i = 0; i < fw_size; i++)
4219                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4220         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4221
4222         return 0;
4223 }
4224
4225 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4226 {
4227         int r;
4228         u32 tmp;
4229
4230         gfx_v8_0_rlc_stop(adev);
4231
4232         /* disable CG */
4233         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4234         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4235                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4236         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4237         if (adev->asic_type == CHIP_POLARIS11 ||
4238             adev->asic_type == CHIP_POLARIS10 ||
4239             adev->asic_type == CHIP_POLARIS12 ||
4240             adev->asic_type == CHIP_VEGAM) {
4241                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4242                 tmp &= ~0x3;
4243                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4244         }
4245
4246         /* disable PG */
4247         WREG32(mmRLC_PG_CNTL, 0);
4248
4249         gfx_v8_0_rlc_reset(adev);
4250         gfx_v8_0_init_pg(adev);
4251
4252
4253         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4254                 /* legacy rlc firmware loading */
4255                 r = gfx_v8_0_rlc_load_microcode(adev);
4256                 if (r)
4257                         return r;
4258         }
4259
4260         gfx_v8_0_rlc_start(adev);
4261
4262         return 0;
4263 }
4264
4265 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4266 {
4267         int i;
4268         u32 tmp = RREG32(mmCP_ME_CNTL);
4269
4270         if (enable) {
4271                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4272                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4273                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4274         } else {
4275                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4276                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4277                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4278                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4279                         adev->gfx.gfx_ring[i].ready = false;
4280         }
4281         WREG32(mmCP_ME_CNTL, tmp);
4282         udelay(50);
4283 }
4284
4285 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4286 {
4287         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4288         const struct gfx_firmware_header_v1_0 *ce_hdr;
4289         const struct gfx_firmware_header_v1_0 *me_hdr;
4290         const __le32 *fw_data;
4291         unsigned i, fw_size;
4292
4293         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4294                 return -EINVAL;
4295
4296         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4297                 adev->gfx.pfp_fw->data;
4298         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4299                 adev->gfx.ce_fw->data;
4300         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4301                 adev->gfx.me_fw->data;
4302
4303         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4304         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4305         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4306
4307         gfx_v8_0_cp_gfx_enable(adev, false);
4308
4309         /* PFP */
4310         fw_data = (const __le32 *)
4311                 (adev->gfx.pfp_fw->data +
4312                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4313         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4314         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4315         for (i = 0; i < fw_size; i++)
4316                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4317         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4318
4319         /* CE */
4320         fw_data = (const __le32 *)
4321                 (adev->gfx.ce_fw->data +
4322                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4323         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4324         WREG32(mmCP_CE_UCODE_ADDR, 0);
4325         for (i = 0; i < fw_size; i++)
4326                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4327         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4328
4329         /* ME */
4330         fw_data = (const __le32 *)
4331                 (adev->gfx.me_fw->data +
4332                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4333         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4334         WREG32(mmCP_ME_RAM_WADDR, 0);
4335         for (i = 0; i < fw_size; i++)
4336                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4337         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4338
4339         return 0;
4340 }
4341
4342 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4343 {
4344         u32 count = 0;
4345         const struct cs_section_def *sect = NULL;
4346         const struct cs_extent_def *ext = NULL;
4347
4348         /* begin clear state */
4349         count += 2;
4350         /* context control state */
4351         count += 3;
4352
4353         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4354                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4355                         if (sect->id == SECT_CONTEXT)
4356                                 count += 2 + ext->reg_count;
4357                         else
4358                                 return 0;
4359                 }
4360         }
4361         /* pa_sc_raster_config/pa_sc_raster_config1 */
4362         count += 4;
4363         /* end clear state */
4364         count += 2;
4365         /* clear state */
4366         count += 2;
4367
4368         return count;
4369 }
4370
4371 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4372 {
4373         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4374         const struct cs_section_def *sect = NULL;
4375         const struct cs_extent_def *ext = NULL;
4376         int r, i;
4377
4378         /* init the CP */
4379         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4380         WREG32(mmCP_ENDIAN_SWAP, 0);
4381         WREG32(mmCP_DEVICE_ID, 1);
4382
4383         gfx_v8_0_cp_gfx_enable(adev, true);
4384
4385         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4386         if (r) {
4387                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4388                 return r;
4389         }
4390
4391         /* clear state buffer */
4392         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4393         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4394
4395         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4396         amdgpu_ring_write(ring, 0x80000000);
4397         amdgpu_ring_write(ring, 0x80000000);
4398
4399         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4400                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4401                         if (sect->id == SECT_CONTEXT) {
4402                                 amdgpu_ring_write(ring,
4403                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4404                                                ext->reg_count));
4405                                 amdgpu_ring_write(ring,
4406                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4407                                 for (i = 0; i < ext->reg_count; i++)
4408                                         amdgpu_ring_write(ring, ext->extent[i]);
4409                         }
4410                 }
4411         }
4412
4413         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4414         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4415         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4416         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4417
4418         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4419         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4420
4421         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4422         amdgpu_ring_write(ring, 0);
4423
4424         /* init the CE partitions */
4425         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4426         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4427         amdgpu_ring_write(ring, 0x8000);
4428         amdgpu_ring_write(ring, 0x8000);
4429
4430         amdgpu_ring_commit(ring);
4431
4432         return 0;
4433 }
4434 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4435 {
4436         u32 tmp;
4437         /* no gfx doorbells on iceland */
4438         if (adev->asic_type == CHIP_TOPAZ)
4439                 return;
4440
4441         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4442
4443         if (ring->use_doorbell) {
4444                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4445                                 DOORBELL_OFFSET, ring->doorbell_index);
4446                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4447                                                 DOORBELL_HIT, 0);
4448                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4449                                             DOORBELL_EN, 1);
4450         } else {
4451                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4452         }
4453
4454         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4455
4456         if (adev->flags & AMD_IS_APU)
4457                 return;
4458
4459         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4460                                         DOORBELL_RANGE_LOWER,
4461                                         AMDGPU_DOORBELL_GFX_RING0);
4462         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4463
4464         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4465                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4466 }
4467
4468 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4469 {
4470         struct amdgpu_ring *ring;
4471         u32 tmp;
4472         u32 rb_bufsz;
4473         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4474         int r;
4475
4476         /* Set the write pointer delay */
4477         WREG32(mmCP_RB_WPTR_DELAY, 0);
4478
4479         /* set the RB to use vmid 0 */
4480         WREG32(mmCP_RB_VMID, 0);
4481
4482         /* Set ring buffer size */
4483         ring = &adev->gfx.gfx_ring[0];
4484         rb_bufsz = order_base_2(ring->ring_size / 8);
4485         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4486         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4487         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4488         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4489 #ifdef __BIG_ENDIAN
4490         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4491 #endif
4492         WREG32(mmCP_RB0_CNTL, tmp);
4493
4494         /* Initialize the ring buffer's read and write pointers */
4495         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4496         ring->wptr = 0;
4497         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4498
4499         /* set the wb address wether it's enabled or not */
4500         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4501         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4502         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4503
4504         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4505         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4506         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4507         mdelay(1);
4508         WREG32(mmCP_RB0_CNTL, tmp);
4509
4510         rb_addr = ring->gpu_addr >> 8;
4511         WREG32(mmCP_RB0_BASE, rb_addr);
4512         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4513
4514         gfx_v8_0_set_cpg_door_bell(adev, ring);
4515         /* start the ring */
4516         amdgpu_ring_clear_ring(ring);
4517         gfx_v8_0_cp_gfx_start(adev);
4518         ring->ready = true;
4519         r = amdgpu_ring_test_ring(ring);
4520         if (r)
4521                 ring->ready = false;
4522
4523         return r;
4524 }
4525
4526 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4527 {
4528         int i;
4529
4530         if (enable) {
4531                 WREG32(mmCP_MEC_CNTL, 0);
4532         } else {
4533                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4534                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4535                         adev->gfx.compute_ring[i].ready = false;
4536                 adev->gfx.kiq.ring.ready = false;
4537         }
4538         udelay(50);
4539 }
4540
4541 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4542 {
4543         const struct gfx_firmware_header_v1_0 *mec_hdr;
4544         const __le32 *fw_data;
4545         unsigned i, fw_size;
4546
4547         if (!adev->gfx.mec_fw)
4548                 return -EINVAL;
4549
4550         gfx_v8_0_cp_compute_enable(adev, false);
4551
4552         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4553         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4554
4555         fw_data = (const __le32 *)
4556                 (adev->gfx.mec_fw->data +
4557                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4558         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4559
4560         /* MEC1 */
4561         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4562         for (i = 0; i < fw_size; i++)
4563                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4564         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4565
4566         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4567         if (adev->gfx.mec2_fw) {
4568                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4569
4570                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4571                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4572
4573                 fw_data = (const __le32 *)
4574                         (adev->gfx.mec2_fw->data +
4575                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4576                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4577
4578                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4579                 for (i = 0; i < fw_size; i++)
4580                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4581                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4582         }
4583
4584         return 0;
4585 }
4586
4587 /* KIQ functions */
4588 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4589 {
4590         uint32_t tmp;
4591         struct amdgpu_device *adev = ring->adev;
4592
4593         /* tell RLC which is KIQ queue */
4594         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4595         tmp &= 0xffffff00;
4596         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4597         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4598         tmp |= 0x80;
4599         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4600 }
4601
4602 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4603 {
4604         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4605         uint32_t scratch, tmp = 0;
4606         uint64_t queue_mask = 0;
4607         int r, i;
4608
4609         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4610                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4611                         continue;
4612
4613                 /* This situation may be hit in the future if a new HW
4614                  * generation exposes more than 64 queues. If so, the
4615                  * definition of queue_mask needs updating */
4616                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4617                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4618                         break;
4619                 }
4620
4621                 queue_mask |= (1ull << i);
4622         }
4623
4624         r = amdgpu_gfx_scratch_get(adev, &scratch);
4625         if (r) {
4626                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4627                 return r;
4628         }
4629         WREG32(scratch, 0xCAFEDEAD);
4630
4631         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4632         if (r) {
4633                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4634                 amdgpu_gfx_scratch_free(adev, scratch);
4635                 return r;
4636         }
4637         /* set resources */
4638         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4639         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4640         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4641         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4642         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4643         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4644         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4645         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4646         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4647                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4648                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4649                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4650
4651                 /* map queues */
4652                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4653                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4654                 amdgpu_ring_write(kiq_ring,
4655                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4656                 amdgpu_ring_write(kiq_ring,
4657                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4658                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4659                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4660                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4661                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4662                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4663                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4664                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4665         }
4666         /* write to scratch for completion */
4667         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4668         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4669         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4670         amdgpu_ring_commit(kiq_ring);
4671
4672         for (i = 0; i < adev->usec_timeout; i++) {
4673                 tmp = RREG32(scratch);
4674                 if (tmp == 0xDEADBEEF)
4675                         break;
4676                 DRM_UDELAY(1);
4677         }
4678         if (i >= adev->usec_timeout) {
4679                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4680                           scratch, tmp);
4681                 r = -EINVAL;
4682         }
4683         amdgpu_gfx_scratch_free(adev, scratch);
4684
4685         return r;
4686 }
4687
4688 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4689 {
4690         int i, r = 0;
4691
4692         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4693                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4694                 for (i = 0; i < adev->usec_timeout; i++) {
4695                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4696                                 break;
4697                         udelay(1);
4698                 }
4699                 if (i == adev->usec_timeout)
4700                         r = -ETIMEDOUT;
4701         }
4702         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4703         WREG32(mmCP_HQD_PQ_RPTR, 0);
4704         WREG32(mmCP_HQD_PQ_WPTR, 0);
4705
4706         return r;
4707 }
4708
4709 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4710 {
4711         struct amdgpu_device *adev = ring->adev;
4712         struct vi_mqd *mqd = ring->mqd_ptr;
4713         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4714         uint32_t tmp;
4715
4716         mqd->header = 0xC0310800;
4717         mqd->compute_pipelinestat_enable = 0x00000001;
4718         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4719         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4720         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4721         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4722         mqd->compute_misc_reserved = 0x00000003;
4723         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4724                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4725         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4726                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4727         eop_base_addr = ring->eop_gpu_addr >> 8;
4728         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4729         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4730
4731         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4732         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4733         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4734                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4735
4736         mqd->cp_hqd_eop_control = tmp;
4737
4738         /* enable doorbell? */
4739         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4740                             CP_HQD_PQ_DOORBELL_CONTROL,
4741                             DOORBELL_EN,
4742                             ring->use_doorbell ? 1 : 0);
4743
4744         mqd->cp_hqd_pq_doorbell_control = tmp;
4745
4746         /* set the pointer to the MQD */
4747         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4748         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4749
4750         /* set MQD vmid to 0 */
4751         tmp = RREG32(mmCP_MQD_CONTROL);
4752         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4753         mqd->cp_mqd_control = tmp;
4754
4755         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4756         hqd_gpu_addr = ring->gpu_addr >> 8;
4757         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4758         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4759
4760         /* set up the HQD, this is similar to CP_RB0_CNTL */
4761         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4762         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4763                             (order_base_2(ring->ring_size / 4) - 1));
4764         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4765                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4766 #ifdef __BIG_ENDIAN
4767         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4768 #endif
4769         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4770         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4771         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4772         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4773         mqd->cp_hqd_pq_control = tmp;
4774
4775         /* set the wb address whether it's enabled or not */
4776         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4777         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4778         mqd->cp_hqd_pq_rptr_report_addr_hi =
4779                 upper_32_bits(wb_gpu_addr) & 0xffff;
4780
4781         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4782         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4783         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4784         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4785
4786         tmp = 0;
4787         /* enable the doorbell if requested */
4788         if (ring->use_doorbell) {
4789                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4790                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4791                                 DOORBELL_OFFSET, ring->doorbell_index);
4792
4793                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4794                                          DOORBELL_EN, 1);
4795                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4796                                          DOORBELL_SOURCE, 0);
4797                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4798                                          DOORBELL_HIT, 0);
4799         }
4800
4801         mqd->cp_hqd_pq_doorbell_control = tmp;
4802
4803         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4804         ring->wptr = 0;
4805         mqd->cp_hqd_pq_wptr = ring->wptr;
4806         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4807
4808         /* set the vmid for the queue */
4809         mqd->cp_hqd_vmid = 0;
4810
4811         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4812         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4813         mqd->cp_hqd_persistent_state = tmp;
4814
4815         /* set MTYPE */
4816         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4817         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4818         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4819         mqd->cp_hqd_ib_control = tmp;
4820
4821         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4822         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4823         mqd->cp_hqd_iq_timer = tmp;
4824
4825         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4826         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4827         mqd->cp_hqd_ctx_save_control = tmp;
4828
4829         /* defaults */
4830         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4831         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4832         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4833         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4834         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4835         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4836         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4837         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4838         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4839         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4840         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4841         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4842         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4843         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4844         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4845
4846         /* activate the queue */
4847         mqd->cp_hqd_active = 1;
4848
4849         return 0;
4850 }
4851
4852 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4853                         struct vi_mqd *mqd)
4854 {
4855         uint32_t mqd_reg;
4856         uint32_t *mqd_data;
4857
4858         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4859         mqd_data = &mqd->cp_mqd_base_addr_lo;
4860
4861         /* disable wptr polling */
4862         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4863
4864         /* program all HQD registers */
4865         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4866                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4867
4868         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4869          * This is safe since EOP RPTR==WPTR for any inactive HQD
4870          * on ASICs that do not support context-save.
4871          * EOP writes/reads can start anywhere in the ring.
4872          */
4873         if (adev->asic_type != CHIP_TONGA) {
4874                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4875                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4876                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4877         }
4878
4879         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4880                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4881
4882         /* activate the HQD */
4883         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4884                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4885
4886         return 0;
4887 }
4888
4889 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4890 {
4891         struct amdgpu_device *adev = ring->adev;
4892         struct vi_mqd *mqd = ring->mqd_ptr;
4893         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4894
4895         gfx_v8_0_kiq_setting(ring);
4896
4897         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4898                 /* reset MQD to a clean status */
4899                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4900                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4901
4902                 /* reset ring buffer */
4903                 ring->wptr = 0;
4904                 amdgpu_ring_clear_ring(ring);
4905                 mutex_lock(&adev->srbm_mutex);
4906                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4907                 gfx_v8_0_mqd_commit(adev, mqd);
4908                 vi_srbm_select(adev, 0, 0, 0, 0);
4909                 mutex_unlock(&adev->srbm_mutex);
4910         } else {
4911                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4912                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4913                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4914                 mutex_lock(&adev->srbm_mutex);
4915                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4916                 gfx_v8_0_mqd_init(ring);
4917                 gfx_v8_0_mqd_commit(adev, mqd);
4918                 vi_srbm_select(adev, 0, 0, 0, 0);
4919                 mutex_unlock(&adev->srbm_mutex);
4920
4921                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4922                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4923         }
4924
4925         return 0;
4926 }
4927
4928 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4929 {
4930         struct amdgpu_device *adev = ring->adev;
4931         struct vi_mqd *mqd = ring->mqd_ptr;
4932         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4933
4934         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4935                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4936                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4937                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4938                 mutex_lock(&adev->srbm_mutex);
4939                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4940                 gfx_v8_0_mqd_init(ring);
4941                 vi_srbm_select(adev, 0, 0, 0, 0);
4942                 mutex_unlock(&adev->srbm_mutex);
4943
4944                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4945                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4946         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4947                 /* reset MQD to a clean status */
4948                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4949                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4950                 /* reset ring buffer */
4951                 ring->wptr = 0;
4952                 amdgpu_ring_clear_ring(ring);
4953         } else {
4954                 amdgpu_ring_clear_ring(ring);
4955         }
4956         return 0;
4957 }
4958
4959 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4960 {
4961         if (adev->asic_type > CHIP_TONGA) {
4962                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4963                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4964         }
4965         /* enable doorbells */
4966         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4967 }
4968
4969 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4970 {
4971         struct amdgpu_ring *ring = NULL;
4972         int r = 0, i;
4973
4974         gfx_v8_0_cp_compute_enable(adev, true);
4975
4976         ring = &adev->gfx.kiq.ring;
4977
4978         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4979         if (unlikely(r != 0))
4980                 goto done;
4981
4982         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4983         if (!r) {
4984                 r = gfx_v8_0_kiq_init_queue(ring);
4985                 amdgpu_bo_kunmap(ring->mqd_obj);
4986                 ring->mqd_ptr = NULL;
4987         }
4988         amdgpu_bo_unreserve(ring->mqd_obj);
4989         if (r)
4990                 goto done;
4991
4992         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4993                 ring = &adev->gfx.compute_ring[i];
4994
4995                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4996                 if (unlikely(r != 0))
4997                         goto done;
4998                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4999                 if (!r) {
5000                         r = gfx_v8_0_kcq_init_queue(ring);
5001                         amdgpu_bo_kunmap(ring->mqd_obj);
5002                         ring->mqd_ptr = NULL;
5003                 }
5004                 amdgpu_bo_unreserve(ring->mqd_obj);
5005                 if (r)
5006                         goto done;
5007         }
5008
5009         gfx_v8_0_set_mec_doorbell_range(adev);
5010
5011         r = gfx_v8_0_kiq_kcq_enable(adev);
5012         if (r)
5013                 goto done;
5014
5015         /* Test KIQ */
5016         ring = &adev->gfx.kiq.ring;
5017         ring->ready = true;
5018         r = amdgpu_ring_test_ring(ring);
5019         if (r) {
5020                 ring->ready = false;
5021                 goto done;
5022         }
5023
5024         /* Test KCQs */
5025         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5026                 ring = &adev->gfx.compute_ring[i];
5027                 ring->ready = true;
5028                 r = amdgpu_ring_test_ring(ring);
5029                 if (r)
5030                         ring->ready = false;
5031         }
5032
5033 done:
5034         return r;
5035 }
5036
5037 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5038 {
5039         int r;
5040
5041         if (!(adev->flags & AMD_IS_APU))
5042                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5043
5044         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5045                         /* legacy firmware loading */
5046                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5047                 if (r)
5048                         return r;
5049
5050                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5051                 if (r)
5052                         return r;
5053         }
5054
5055         r = gfx_v8_0_cp_gfx_resume(adev);
5056         if (r)
5057                 return r;
5058
5059         r = gfx_v8_0_kiq_resume(adev);
5060         if (r)
5061                 return r;
5062
5063         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5064
5065         return 0;
5066 }
5067
5068 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5069 {
5070         gfx_v8_0_cp_gfx_enable(adev, enable);
5071         gfx_v8_0_cp_compute_enable(adev, enable);
5072 }
5073
5074 static int gfx_v8_0_hw_init(void *handle)
5075 {
5076         int r;
5077         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5078
5079         gfx_v8_0_init_golden_registers(adev);
5080         gfx_v8_0_gpu_init(adev);
5081
5082         r = gfx_v8_0_rlc_resume(adev);
5083         if (r)
5084                 return r;
5085
5086         r = gfx_v8_0_cp_resume(adev);
5087
5088         return r;
5089 }
5090
5091 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5092 {
5093         struct amdgpu_device *adev = kiq_ring->adev;
5094         uint32_t scratch, tmp = 0;
5095         int r, i;
5096
5097         r = amdgpu_gfx_scratch_get(adev, &scratch);
5098         if (r) {
5099                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5100                 return r;
5101         }
5102         WREG32(scratch, 0xCAFEDEAD);
5103
5104         r = amdgpu_ring_alloc(kiq_ring, 10);
5105         if (r) {
5106                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5107                 amdgpu_gfx_scratch_free(adev, scratch);
5108                 return r;
5109         }
5110
5111         /* unmap queues */
5112         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5113         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5114                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5115                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5116                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5117                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5118         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5119         amdgpu_ring_write(kiq_ring, 0);
5120         amdgpu_ring_write(kiq_ring, 0);
5121         amdgpu_ring_write(kiq_ring, 0);
5122         /* write to scratch for completion */
5123         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5124         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5125         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5126         amdgpu_ring_commit(kiq_ring);
5127
5128         for (i = 0; i < adev->usec_timeout; i++) {
5129                 tmp = RREG32(scratch);
5130                 if (tmp == 0xDEADBEEF)
5131                         break;
5132                 DRM_UDELAY(1);
5133         }
5134         if (i >= adev->usec_timeout) {
5135                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5136                 r = -EINVAL;
5137         }
5138         amdgpu_gfx_scratch_free(adev, scratch);
5139         return r;
5140 }
5141
5142 static int gfx_v8_0_hw_fini(void *handle)
5143 {
5144         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5145         int i;
5146
5147         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5148         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5149
5150         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5151
5152         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5153
5154         /* disable KCQ to avoid CPC touch memory not valid anymore */
5155         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5156                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5157
5158         if (amdgpu_sriov_vf(adev)) {
5159                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5160                 return 0;
5161         }
5162         gfx_v8_0_cp_enable(adev, false);
5163         gfx_v8_0_rlc_stop(adev);
5164
5165         amdgpu_device_ip_set_powergating_state(adev,
5166                                                AMD_IP_BLOCK_TYPE_GFX,
5167                                                AMD_PG_STATE_UNGATE);
5168
5169         return 0;
5170 }
5171
5172 static int gfx_v8_0_suspend(void *handle)
5173 {
5174         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5175         adev->gfx.in_suspend = true;
5176         return gfx_v8_0_hw_fini(adev);
5177 }
5178
5179 static int gfx_v8_0_resume(void *handle)
5180 {
5181         int r;
5182         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5183
5184         r = gfx_v8_0_hw_init(adev);
5185         adev->gfx.in_suspend = false;
5186         return r;
5187 }
5188
5189 static bool gfx_v8_0_is_idle(void *handle)
5190 {
5191         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5192
5193         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5194                 return false;
5195         else
5196                 return true;
5197 }
5198
5199 static int gfx_v8_0_wait_for_idle(void *handle)
5200 {
5201         unsigned i;
5202         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5203
5204         for (i = 0; i < adev->usec_timeout; i++) {
5205                 if (gfx_v8_0_is_idle(handle))
5206                         return 0;
5207
5208                 udelay(1);
5209         }
5210         return -ETIMEDOUT;
5211 }
5212
5213 static bool gfx_v8_0_check_soft_reset(void *handle)
5214 {
5215         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5216         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5217         u32 tmp;
5218
5219         /* GRBM_STATUS */
5220         tmp = RREG32(mmGRBM_STATUS);
5221         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5222                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5223                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5224                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5225                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5226                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5227                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5228                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5229                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5230                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5231                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5232                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5233                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5234         }
5235
5236         /* GRBM_STATUS2 */
5237         tmp = RREG32(mmGRBM_STATUS2);
5238         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5239                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5240                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5241
5242         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5243             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5244             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5245                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5246                                                 SOFT_RESET_CPF, 1);
5247                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5248                                                 SOFT_RESET_CPC, 1);
5249                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5250                                                 SOFT_RESET_CPG, 1);
5251                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5252                                                 SOFT_RESET_GRBM, 1);
5253         }
5254
5255         /* SRBM_STATUS */
5256         tmp = RREG32(mmSRBM_STATUS);
5257         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5258                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5259                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5260         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5261                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5262                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5263
5264         if (grbm_soft_reset || srbm_soft_reset) {
5265                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5266                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5267                 return true;
5268         } else {
5269                 adev->gfx.grbm_soft_reset = 0;
5270                 adev->gfx.srbm_soft_reset = 0;
5271                 return false;
5272         }
5273 }
5274
5275 static int gfx_v8_0_pre_soft_reset(void *handle)
5276 {
5277         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5278         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5279
5280         if ((!adev->gfx.grbm_soft_reset) &&
5281             (!adev->gfx.srbm_soft_reset))
5282                 return 0;
5283
5284         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5285         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5286
5287         /* stop the rlc */
5288         gfx_v8_0_rlc_stop(adev);
5289
5290         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5291             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5292                 /* Disable GFX parsing/prefetching */
5293                 gfx_v8_0_cp_gfx_enable(adev, false);
5294
5295         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5296             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5297             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5298             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5299                 int i;
5300
5301                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5302                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5303
5304                         mutex_lock(&adev->srbm_mutex);
5305                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5306                         gfx_v8_0_deactivate_hqd(adev, 2);
5307                         vi_srbm_select(adev, 0, 0, 0, 0);
5308                         mutex_unlock(&adev->srbm_mutex);
5309                 }
5310                 /* Disable MEC parsing/prefetching */
5311                 gfx_v8_0_cp_compute_enable(adev, false);
5312         }
5313
5314        return 0;
5315 }
5316
5317 static int gfx_v8_0_soft_reset(void *handle)
5318 {
5319         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5320         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5321         u32 tmp;
5322
5323         if ((!adev->gfx.grbm_soft_reset) &&
5324             (!adev->gfx.srbm_soft_reset))
5325                 return 0;
5326
5327         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5328         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5329
5330         if (grbm_soft_reset || srbm_soft_reset) {
5331                 tmp = RREG32(mmGMCON_DEBUG);
5332                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5333                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5334                 WREG32(mmGMCON_DEBUG, tmp);
5335                 udelay(50);
5336         }
5337
5338         if (grbm_soft_reset) {
5339                 tmp = RREG32(mmGRBM_SOFT_RESET);
5340                 tmp |= grbm_soft_reset;
5341                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5342                 WREG32(mmGRBM_SOFT_RESET, tmp);
5343                 tmp = RREG32(mmGRBM_SOFT_RESET);
5344
5345                 udelay(50);
5346
5347                 tmp &= ~grbm_soft_reset;
5348                 WREG32(mmGRBM_SOFT_RESET, tmp);
5349                 tmp = RREG32(mmGRBM_SOFT_RESET);
5350         }
5351
5352         if (srbm_soft_reset) {
5353                 tmp = RREG32(mmSRBM_SOFT_RESET);
5354                 tmp |= srbm_soft_reset;
5355                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5356                 WREG32(mmSRBM_SOFT_RESET, tmp);
5357                 tmp = RREG32(mmSRBM_SOFT_RESET);
5358
5359                 udelay(50);
5360
5361                 tmp &= ~srbm_soft_reset;
5362                 WREG32(mmSRBM_SOFT_RESET, tmp);
5363                 tmp = RREG32(mmSRBM_SOFT_RESET);
5364         }
5365
5366         if (grbm_soft_reset || srbm_soft_reset) {
5367                 tmp = RREG32(mmGMCON_DEBUG);
5368                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5369                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5370                 WREG32(mmGMCON_DEBUG, tmp);
5371         }
5372
5373         /* Wait a little for things to settle down */
5374         udelay(50);
5375
5376         return 0;
5377 }
5378
5379 static int gfx_v8_0_post_soft_reset(void *handle)
5380 {
5381         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5382         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5383
5384         if ((!adev->gfx.grbm_soft_reset) &&
5385             (!adev->gfx.srbm_soft_reset))
5386                 return 0;
5387
5388         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5389         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5390
5391         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5392             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5393                 gfx_v8_0_cp_gfx_resume(adev);
5394
5395         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5396             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5397             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5398             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5399                 int i;
5400
5401                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5402                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5403
5404                         mutex_lock(&adev->srbm_mutex);
5405                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5406                         gfx_v8_0_deactivate_hqd(adev, 2);
5407                         vi_srbm_select(adev, 0, 0, 0, 0);
5408                         mutex_unlock(&adev->srbm_mutex);
5409                 }
5410                 gfx_v8_0_kiq_resume(adev);
5411         }
5412         gfx_v8_0_rlc_start(adev);
5413
5414         return 0;
5415 }
5416
5417 /**
5418  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5419  *
5420  * @adev: amdgpu_device pointer
5421  *
5422  * Fetches a GPU clock counter snapshot.
5423  * Returns the 64 bit clock counter snapshot.
5424  */
5425 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5426 {
5427         uint64_t clock;
5428
5429         mutex_lock(&adev->gfx.gpu_clock_mutex);
5430         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5431         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5432                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5433         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5434         return clock;
5435 }
5436
5437 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5438                                           uint32_t vmid,
5439                                           uint32_t gds_base, uint32_t gds_size,
5440                                           uint32_t gws_base, uint32_t gws_size,
5441                                           uint32_t oa_base, uint32_t oa_size)
5442 {
5443         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5444         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5445
5446         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5447         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5448
5449         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5450         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5451
5452         /* GDS Base */
5453         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5454         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5455                                 WRITE_DATA_DST_SEL(0)));
5456         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5457         amdgpu_ring_write(ring, 0);
5458         amdgpu_ring_write(ring, gds_base);
5459
5460         /* GDS Size */
5461         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5462         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5463                                 WRITE_DATA_DST_SEL(0)));
5464         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5465         amdgpu_ring_write(ring, 0);
5466         amdgpu_ring_write(ring, gds_size);
5467
5468         /* GWS */
5469         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5470         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5471                                 WRITE_DATA_DST_SEL(0)));
5472         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5473         amdgpu_ring_write(ring, 0);
5474         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5475
5476         /* OA */
5477         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5478         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5479                                 WRITE_DATA_DST_SEL(0)));
5480         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5481         amdgpu_ring_write(ring, 0);
5482         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5483 }
5484
5485 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5486 {
5487         WREG32(mmSQ_IND_INDEX,
5488                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5489                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5490                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5491                 (SQ_IND_INDEX__FORCE_READ_MASK));
5492         return RREG32(mmSQ_IND_DATA);
5493 }
5494
5495 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5496                            uint32_t wave, uint32_t thread,
5497                            uint32_t regno, uint32_t num, uint32_t *out)
5498 {
5499         WREG32(mmSQ_IND_INDEX,
5500                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5501                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5502                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5503                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5504                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5505                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5506         while (num--)
5507                 *(out++) = RREG32(mmSQ_IND_DATA);
5508 }
5509
5510 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5511 {
5512         /* type 0 wave data */
5513         dst[(*no_fields)++] = 0;
5514         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5515         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5516         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5517         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5518         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5519         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5520         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5521         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5522         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5523         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5524         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5525         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5526         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5527         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5528         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5529         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5530         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5531         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5532 }
5533
5534 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5535                                      uint32_t wave, uint32_t start,
5536                                      uint32_t size, uint32_t *dst)
5537 {
5538         wave_read_regs(
5539                 adev, simd, wave, 0,
5540                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5541 }
5542
5543
5544 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5545         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5546         .select_se_sh = &gfx_v8_0_select_se_sh,
5547         .read_wave_data = &gfx_v8_0_read_wave_data,
5548         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5549         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5550 };
5551
5552 static int gfx_v8_0_early_init(void *handle)
5553 {
5554         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5555
5556         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5557         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5558         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5559         gfx_v8_0_set_ring_funcs(adev);
5560         gfx_v8_0_set_irq_funcs(adev);
5561         gfx_v8_0_set_gds_init(adev);
5562         gfx_v8_0_set_rlc_funcs(adev);
5563
5564         return 0;
5565 }
5566
5567 static int gfx_v8_0_late_init(void *handle)
5568 {
5569         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5570         int r;
5571
5572         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5573         if (r)
5574                 return r;
5575
5576         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5577         if (r)
5578                 return r;
5579
5580         /* requires IBs so do in late init after IB pool is initialized */
5581         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5582         if (r)
5583                 return r;
5584
5585         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5586         if (r) {
5587                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5588                 return r;
5589         }
5590
5591         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5592         if (r) {
5593                 DRM_ERROR(
5594                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5595                         r);
5596                 return r;
5597         }
5598
5599         amdgpu_device_ip_set_powergating_state(adev,
5600                                                AMD_IP_BLOCK_TYPE_GFX,
5601                                                AMD_PG_STATE_GATE);
5602
5603         return 0;
5604 }
5605
5606 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5607                                                        bool enable)
5608 {
5609         if ((adev->asic_type == CHIP_POLARIS11) ||
5610             (adev->asic_type == CHIP_POLARIS12) ||
5611             (adev->asic_type == CHIP_VEGAM))
5612                 /* Send msg to SMU via Powerplay */
5613                 amdgpu_device_ip_set_powergating_state(adev,
5614                                                        AMD_IP_BLOCK_TYPE_SMC,
5615                                                        enable ?
5616                                                        AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5617
5618         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5619 }
5620
5621 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5622                                                         bool enable)
5623 {
5624         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5625 }
5626
5627 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5628                 bool enable)
5629 {
5630         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5631 }
5632
5633 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5634                                           bool enable)
5635 {
5636         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5637 }
5638
5639 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5640                                                 bool enable)
5641 {
5642         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5643
5644         /* Read any GFX register to wake up GFX. */
5645         if (!enable)
5646                 RREG32(mmDB_RENDER_CONTROL);
5647 }
5648
5649 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5650                                           bool enable)
5651 {
5652         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5653                 cz_enable_gfx_cg_power_gating(adev, true);
5654                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5655                         cz_enable_gfx_pipeline_power_gating(adev, true);
5656         } else {
5657                 cz_enable_gfx_cg_power_gating(adev, false);
5658                 cz_enable_gfx_pipeline_power_gating(adev, false);
5659         }
5660 }
5661
5662 static int gfx_v8_0_set_powergating_state(void *handle,
5663                                           enum amd_powergating_state state)
5664 {
5665         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5666         bool enable = (state == AMD_PG_STATE_GATE);
5667
5668         if (amdgpu_sriov_vf(adev))
5669                 return 0;
5670
5671         switch (adev->asic_type) {
5672         case CHIP_CARRIZO:
5673         case CHIP_STONEY:
5674
5675                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5676                         cz_enable_sck_slow_down_on_power_up(adev, true);
5677                         cz_enable_sck_slow_down_on_power_down(adev, true);
5678                 } else {
5679                         cz_enable_sck_slow_down_on_power_up(adev, false);
5680                         cz_enable_sck_slow_down_on_power_down(adev, false);
5681                 }
5682                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5683                         cz_enable_cp_power_gating(adev, true);
5684                 else
5685                         cz_enable_cp_power_gating(adev, false);
5686
5687                 cz_update_gfx_cg_power_gating(adev, enable);
5688
5689                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5690                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5691                 else
5692                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5693
5694                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5695                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5696                 else
5697                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5698                 break;
5699         case CHIP_POLARIS11:
5700         case CHIP_POLARIS12:
5701         case CHIP_VEGAM:
5702                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5703                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5704                 else
5705                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5706
5707                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5708                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5709                 else
5710                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5711
5712                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5713                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5714                 else
5715                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5716                 break;
5717         default:
5718                 break;
5719         }
5720
5721         return 0;
5722 }
5723
5724 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5725 {
5726         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5727         int data;
5728
5729         if (amdgpu_sriov_vf(adev))
5730                 *flags = 0;
5731
5732         /* AMD_CG_SUPPORT_GFX_MGCG */
5733         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5734         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5735                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5736
5737         /* AMD_CG_SUPPORT_GFX_CGLG */
5738         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5739         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5740                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5741
5742         /* AMD_CG_SUPPORT_GFX_CGLS */
5743         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5744                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5745
5746         /* AMD_CG_SUPPORT_GFX_CGTS */
5747         data = RREG32(mmCGTS_SM_CTRL_REG);
5748         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5749                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5750
5751         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5752         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5753                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5754
5755         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5756         data = RREG32(mmRLC_MEM_SLP_CNTL);
5757         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5758                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5759
5760         /* AMD_CG_SUPPORT_GFX_CP_LS */
5761         data = RREG32(mmCP_MEM_SLP_CNTL);
5762         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5763                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5764 }
5765
5766 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5767                                      uint32_t reg_addr, uint32_t cmd)
5768 {
5769         uint32_t data;
5770
5771         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5772
5773         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5774         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5775
5776         data = RREG32(mmRLC_SERDES_WR_CTRL);
5777         if (adev->asic_type == CHIP_STONEY)
5778                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5779                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5780                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5781                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5782                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5783                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5784                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5785                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5786                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5787         else
5788                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5789                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5790                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5791                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5792                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5793                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5794                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5795                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5796                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5797                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5798                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5799         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5800                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5801                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5802                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5803
5804         WREG32(mmRLC_SERDES_WR_CTRL, data);
5805 }
5806
5807 #define MSG_ENTER_RLC_SAFE_MODE     1
5808 #define MSG_EXIT_RLC_SAFE_MODE      0
5809 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5810 #define RLC_GPR_REG2__REQ__SHIFT 0
5811 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5812 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5813
5814 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5815 {
5816         u32 data;
5817         unsigned i;
5818
5819         data = RREG32(mmRLC_CNTL);
5820         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5821                 return;
5822
5823         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5824                 data |= RLC_SAFE_MODE__CMD_MASK;
5825                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5826                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5827                 WREG32(mmRLC_SAFE_MODE, data);
5828
5829                 for (i = 0; i < adev->usec_timeout; i++) {
5830                         if ((RREG32(mmRLC_GPM_STAT) &
5831                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5832                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5833                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5834                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5835                                 break;
5836                         udelay(1);
5837                 }
5838
5839                 for (i = 0; i < adev->usec_timeout; i++) {
5840                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5841                                 break;
5842                         udelay(1);
5843                 }
5844                 adev->gfx.rlc.in_safe_mode = true;
5845         }
5846 }
5847
5848 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5849 {
5850         u32 data = 0;
5851         unsigned i;
5852
5853         data = RREG32(mmRLC_CNTL);
5854         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5855                 return;
5856
5857         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5858                 if (adev->gfx.rlc.in_safe_mode) {
5859                         data |= RLC_SAFE_MODE__CMD_MASK;
5860                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5861                         WREG32(mmRLC_SAFE_MODE, data);
5862                         adev->gfx.rlc.in_safe_mode = false;
5863                 }
5864         }
5865
5866         for (i = 0; i < adev->usec_timeout; i++) {
5867                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5868                         break;
5869                 udelay(1);
5870         }
5871 }
5872
5873 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5874         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5875         .exit_safe_mode = iceland_exit_rlc_safe_mode
5876 };
5877
5878 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5879                                                       bool enable)
5880 {
5881         uint32_t temp, data;
5882
5883         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5884
5885         /* It is disabled by HW by default */
5886         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5887                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5888                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5889                                 /* 1 - RLC memory Light sleep */
5890                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5891
5892                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5893                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5894                 }
5895
5896                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5897                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5898                 if (adev->flags & AMD_IS_APU)
5899                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5900                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5901                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5902                 else
5903                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5904                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5905                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5906                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5907
5908                 if (temp != data)
5909                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5910
5911                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5912                 gfx_v8_0_wait_for_rlc_serdes(adev);
5913
5914                 /* 5 - clear mgcg override */
5915                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5916
5917                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5918                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5919                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5920                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5921                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5922                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5923                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5924                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5925                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5926                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5927                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5928                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5929                         if (temp != data)
5930                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5931                 }
5932                 udelay(50);
5933
5934                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5935                 gfx_v8_0_wait_for_rlc_serdes(adev);
5936         } else {
5937                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5938                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5940                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5941                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5942                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5943                 if (temp != data)
5944                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5945
5946                 /* 2 - disable MGLS in RLC */
5947                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5948                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5949                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5950                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5951                 }
5952
5953                 /* 3 - disable MGLS in CP */
5954                 data = RREG32(mmCP_MEM_SLP_CNTL);
5955                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5956                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5957                         WREG32(mmCP_MEM_SLP_CNTL, data);
5958                 }
5959
5960                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5961                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5962                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5963                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5964                 if (temp != data)
5965                         WREG32(mmCGTS_SM_CTRL_REG, data);
5966
5967                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5968                 gfx_v8_0_wait_for_rlc_serdes(adev);
5969
5970                 /* 6 - set mgcg override */
5971                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5972
5973                 udelay(50);
5974
5975                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5976                 gfx_v8_0_wait_for_rlc_serdes(adev);
5977         }
5978
5979         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5980 }
5981
5982 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5983                                                       bool enable)
5984 {
5985         uint32_t temp, temp1, data, data1;
5986
5987         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5988
5989         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5990
5991         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5992                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5993                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5994                 if (temp1 != data1)
5995                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5996
5997                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5998                 gfx_v8_0_wait_for_rlc_serdes(adev);
5999
6000                 /* 2 - clear cgcg override */
6001                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6002
6003                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6004                 gfx_v8_0_wait_for_rlc_serdes(adev);
6005
6006                 /* 3 - write cmd to set CGLS */
6007                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6008
6009                 /* 4 - enable cgcg */
6010                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6011
6012                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6013                         /* enable cgls*/
6014                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6015
6016                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6017                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6018
6019                         if (temp1 != data1)
6020                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6021                 } else {
6022                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6023                 }
6024
6025                 if (temp != data)
6026                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6027
6028                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6029                  * Cmp_busy/GFX_Idle interrupts
6030                  */
6031                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6032         } else {
6033                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6034                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6035
6036                 /* TEST CGCG */
6037                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6038                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6039                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6040                 if (temp1 != data1)
6041                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6042
6043                 /* read gfx register to wake up cgcg */
6044                 RREG32(mmCB_CGTT_SCLK_CTRL);
6045                 RREG32(mmCB_CGTT_SCLK_CTRL);
6046                 RREG32(mmCB_CGTT_SCLK_CTRL);
6047                 RREG32(mmCB_CGTT_SCLK_CTRL);
6048
6049                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6050                 gfx_v8_0_wait_for_rlc_serdes(adev);
6051
6052                 /* write cmd to Set CGCG Overrride */
6053                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6054
6055                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6056                 gfx_v8_0_wait_for_rlc_serdes(adev);
6057
6058                 /* write cmd to Clear CGLS */
6059                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6060
6061                 /* disable cgcg, cgls should be disabled too. */
6062                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6063                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6064                 if (temp != data)
6065                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6066                 /* enable interrupts again for PG */
6067                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6068         }
6069
6070         gfx_v8_0_wait_for_rlc_serdes(adev);
6071
6072         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6073 }
6074 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6075                                             bool enable)
6076 {
6077         if (enable) {
6078                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6079                  * ===  MGCG + MGLS + TS(CG/LS) ===
6080                  */
6081                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6082                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6083         } else {
6084                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6085                  * ===  CGCG + CGLS ===
6086                  */
6087                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6088                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6089         }
6090         return 0;
6091 }
6092
6093 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6094                                           enum amd_clockgating_state state)
6095 {
6096         uint32_t msg_id, pp_state = 0;
6097         uint32_t pp_support_state = 0;
6098
6099         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6100                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6101                         pp_support_state = PP_STATE_SUPPORT_LS;
6102                         pp_state = PP_STATE_LS;
6103                 }
6104                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6105                         pp_support_state |= PP_STATE_SUPPORT_CG;
6106                         pp_state |= PP_STATE_CG;
6107                 }
6108                 if (state == AMD_CG_STATE_UNGATE)
6109                         pp_state = 0;
6110
6111                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6112                                 PP_BLOCK_GFX_CG,
6113                                 pp_support_state,
6114                                 pp_state);
6115                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6116                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6117         }
6118
6119         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6120                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6121                         pp_support_state = PP_STATE_SUPPORT_LS;
6122                         pp_state = PP_STATE_LS;
6123                 }
6124
6125                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6126                         pp_support_state |= PP_STATE_SUPPORT_CG;
6127                         pp_state |= PP_STATE_CG;
6128                 }
6129
6130                 if (state == AMD_CG_STATE_UNGATE)
6131                         pp_state = 0;
6132
6133                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6134                                 PP_BLOCK_GFX_MG,
6135                                 pp_support_state,
6136                                 pp_state);
6137                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6138                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6139         }
6140
6141         return 0;
6142 }
6143
6144 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6145                                           enum amd_clockgating_state state)
6146 {
6147
6148         uint32_t msg_id, pp_state = 0;
6149         uint32_t pp_support_state = 0;
6150
6151         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6152                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6153                         pp_support_state = PP_STATE_SUPPORT_LS;
6154                         pp_state = PP_STATE_LS;
6155                 }
6156                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6157                         pp_support_state |= PP_STATE_SUPPORT_CG;
6158                         pp_state |= PP_STATE_CG;
6159                 }
6160                 if (state == AMD_CG_STATE_UNGATE)
6161                         pp_state = 0;
6162
6163                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6164                                 PP_BLOCK_GFX_CG,
6165                                 pp_support_state,
6166                                 pp_state);
6167                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6168                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6169         }
6170
6171         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6172                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6173                         pp_support_state = PP_STATE_SUPPORT_LS;
6174                         pp_state = PP_STATE_LS;
6175                 }
6176                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6177                         pp_support_state |= PP_STATE_SUPPORT_CG;
6178                         pp_state |= PP_STATE_CG;
6179                 }
6180                 if (state == AMD_CG_STATE_UNGATE)
6181                         pp_state = 0;
6182
6183                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6184                                 PP_BLOCK_GFX_3D,
6185                                 pp_support_state,
6186                                 pp_state);
6187                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6188                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6189         }
6190
6191         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6192                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6193                         pp_support_state = PP_STATE_SUPPORT_LS;
6194                         pp_state = PP_STATE_LS;
6195                 }
6196
6197                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6198                         pp_support_state |= PP_STATE_SUPPORT_CG;
6199                         pp_state |= PP_STATE_CG;
6200                 }
6201
6202                 if (state == AMD_CG_STATE_UNGATE)
6203                         pp_state = 0;
6204
6205                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6206                                 PP_BLOCK_GFX_MG,
6207                                 pp_support_state,
6208                                 pp_state);
6209                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6210                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6211         }
6212
6213         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6214                 pp_support_state = PP_STATE_SUPPORT_LS;
6215
6216                 if (state == AMD_CG_STATE_UNGATE)
6217                         pp_state = 0;
6218                 else
6219                         pp_state = PP_STATE_LS;
6220
6221                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6222                                 PP_BLOCK_GFX_RLC,
6223                                 pp_support_state,
6224                                 pp_state);
6225                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6226                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6227         }
6228
6229         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6230                 pp_support_state = PP_STATE_SUPPORT_LS;
6231
6232                 if (state == AMD_CG_STATE_UNGATE)
6233                         pp_state = 0;
6234                 else
6235                         pp_state = PP_STATE_LS;
6236                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6237                         PP_BLOCK_GFX_CP,
6238                         pp_support_state,
6239                         pp_state);
6240                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6241                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6242         }
6243
6244         return 0;
6245 }
6246
6247 static int gfx_v8_0_set_clockgating_state(void *handle,
6248                                           enum amd_clockgating_state state)
6249 {
6250         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6251
6252         if (amdgpu_sriov_vf(adev))
6253                 return 0;
6254
6255         switch (adev->asic_type) {
6256         case CHIP_FIJI:
6257         case CHIP_CARRIZO:
6258         case CHIP_STONEY:
6259                 gfx_v8_0_update_gfx_clock_gating(adev,
6260                                                  state == AMD_CG_STATE_GATE);
6261                 break;
6262         case CHIP_TONGA:
6263                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6264                 break;
6265         case CHIP_POLARIS10:
6266         case CHIP_POLARIS11:
6267         case CHIP_POLARIS12:
6268         case CHIP_VEGAM:
6269                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6270                 break;
6271         default:
6272                 break;
6273         }
6274         return 0;
6275 }
6276
6277 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6278 {
6279         return ring->adev->wb.wb[ring->rptr_offs];
6280 }
6281
6282 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6283 {
6284         struct amdgpu_device *adev = ring->adev;
6285
6286         if (ring->use_doorbell)
6287                 /* XXX check if swapping is necessary on BE */
6288                 return ring->adev->wb.wb[ring->wptr_offs];
6289         else
6290                 return RREG32(mmCP_RB0_WPTR);
6291 }
6292
6293 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6294 {
6295         struct amdgpu_device *adev = ring->adev;
6296
6297         if (ring->use_doorbell) {
6298                 /* XXX check if swapping is necessary on BE */
6299                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6300                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6301         } else {
6302                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6303                 (void)RREG32(mmCP_RB0_WPTR);
6304         }
6305 }
6306
6307 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6308 {
6309         u32 ref_and_mask, reg_mem_engine;
6310
6311         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6312             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6313                 switch (ring->me) {
6314                 case 1:
6315                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6316                         break;
6317                 case 2:
6318                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6319                         break;
6320                 default:
6321                         return;
6322                 }
6323                 reg_mem_engine = 0;
6324         } else {
6325                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6326                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6327         }
6328
6329         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6330         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6331                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6332                                  reg_mem_engine));
6333         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6334         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6335         amdgpu_ring_write(ring, ref_and_mask);
6336         amdgpu_ring_write(ring, ref_and_mask);
6337         amdgpu_ring_write(ring, 0x20); /* poll interval */
6338 }
6339
6340 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6341 {
6342         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6343         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6344                 EVENT_INDEX(4));
6345
6346         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6347         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6348                 EVENT_INDEX(0));
6349 }
6350
6351 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6352                                       struct amdgpu_ib *ib,
6353                                       unsigned vmid, bool ctx_switch)
6354 {
6355         u32 header, control = 0;
6356
6357         if (ib->flags & AMDGPU_IB_FLAG_CE)
6358                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6359         else
6360                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6361
6362         control |= ib->length_dw | (vmid << 24);
6363
6364         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6365                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6366
6367                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6368                         gfx_v8_0_ring_emit_de_meta(ring);
6369         }
6370
6371         amdgpu_ring_write(ring, header);
6372         amdgpu_ring_write(ring,
6373 #ifdef __BIG_ENDIAN
6374                           (2 << 0) |
6375 #endif
6376                           (ib->gpu_addr & 0xFFFFFFFC));
6377         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6378         amdgpu_ring_write(ring, control);
6379 }
6380
6381 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6382                                           struct amdgpu_ib *ib,
6383                                           unsigned vmid, bool ctx_switch)
6384 {
6385         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6386
6387         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6388         amdgpu_ring_write(ring,
6389 #ifdef __BIG_ENDIAN
6390                                 (2 << 0) |
6391 #endif
6392                                 (ib->gpu_addr & 0xFFFFFFFC));
6393         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6394         amdgpu_ring_write(ring, control);
6395 }
6396
6397 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6398                                          u64 seq, unsigned flags)
6399 {
6400         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6401         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6402
6403         /* EVENT_WRITE_EOP - flush caches, send int */
6404         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6405         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6406                                  EOP_TC_ACTION_EN |
6407                                  EOP_TC_WB_ACTION_EN |
6408                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6409                                  EVENT_INDEX(5)));
6410         amdgpu_ring_write(ring, addr & 0xfffffffc);
6411         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6412                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6413         amdgpu_ring_write(ring, lower_32_bits(seq));
6414         amdgpu_ring_write(ring, upper_32_bits(seq));
6415
6416 }
6417
6418 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6419 {
6420         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6421         uint32_t seq = ring->fence_drv.sync_seq;
6422         uint64_t addr = ring->fence_drv.gpu_addr;
6423
6424         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6425         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6426                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6427                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6428         amdgpu_ring_write(ring, addr & 0xfffffffc);
6429         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6430         amdgpu_ring_write(ring, seq);
6431         amdgpu_ring_write(ring, 0xffffffff);
6432         amdgpu_ring_write(ring, 4); /* poll interval */
6433 }
6434
6435 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6436                                         unsigned vmid, uint64_t pd_addr)
6437 {
6438         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6439
6440         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6441
6442         /* wait for the invalidate to complete */
6443         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6444         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6445                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6446                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6447         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6448         amdgpu_ring_write(ring, 0);
6449         amdgpu_ring_write(ring, 0); /* ref */
6450         amdgpu_ring_write(ring, 0); /* mask */
6451         amdgpu_ring_write(ring, 0x20); /* poll interval */
6452
6453         /* compute doesn't have PFP */
6454         if (usepfp) {
6455                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6456                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6457                 amdgpu_ring_write(ring, 0x0);
6458         }
6459 }
6460
6461 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6462 {
6463         return ring->adev->wb.wb[ring->wptr_offs];
6464 }
6465
6466 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6467 {
6468         struct amdgpu_device *adev = ring->adev;
6469
6470         /* XXX check if swapping is necessary on BE */
6471         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6472         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6473 }
6474
6475 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6476                                            bool acquire)
6477 {
6478         struct amdgpu_device *adev = ring->adev;
6479         int pipe_num, tmp, reg;
6480         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6481
6482         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6483
6484         /* first me only has 2 entries, GFX and HP3D */
6485         if (ring->me > 0)
6486                 pipe_num -= 2;
6487
6488         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6489         tmp = RREG32(reg);
6490         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6491         WREG32(reg, tmp);
6492 }
6493
6494 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6495                                             struct amdgpu_ring *ring,
6496                                             bool acquire)
6497 {
6498         int i, pipe;
6499         bool reserve;
6500         struct amdgpu_ring *iring;
6501
6502         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6503         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6504         if (acquire)
6505                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6506         else
6507                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6508
6509         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6510                 /* Clear all reservations - everyone reacquires all resources */
6511                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6512                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6513                                                        true);
6514
6515                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6516                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6517                                                        true);
6518         } else {
6519                 /* Lower all pipes without a current reservation */
6520                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6521                         iring = &adev->gfx.gfx_ring[i];
6522                         pipe = amdgpu_gfx_queue_to_bit(adev,
6523                                                        iring->me,
6524                                                        iring->pipe,
6525                                                        0);
6526                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6527                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6528                 }
6529
6530                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6531                         iring = &adev->gfx.compute_ring[i];
6532                         pipe = amdgpu_gfx_queue_to_bit(adev,
6533                                                        iring->me,
6534                                                        iring->pipe,
6535                                                        0);
6536                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6537                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6538                 }
6539         }
6540
6541         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6542 }
6543
6544 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6545                                       struct amdgpu_ring *ring,
6546                                       bool acquire)
6547 {
6548         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6549         uint32_t queue_priority = acquire ? 0xf : 0x0;
6550
6551         mutex_lock(&adev->srbm_mutex);
6552         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6553
6554         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6555         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6556
6557         vi_srbm_select(adev, 0, 0, 0, 0);
6558         mutex_unlock(&adev->srbm_mutex);
6559 }
6560 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6561                                                enum drm_sched_priority priority)
6562 {
6563         struct amdgpu_device *adev = ring->adev;
6564         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6565
6566         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6567                 return;
6568
6569         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6570         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6571 }
6572
6573 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6574                                              u64 addr, u64 seq,
6575                                              unsigned flags)
6576 {
6577         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6578         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6579
6580         /* RELEASE_MEM - flush caches, send int */
6581         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6582         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6583                                  EOP_TC_ACTION_EN |
6584                                  EOP_TC_WB_ACTION_EN |
6585                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6586                                  EVENT_INDEX(5)));
6587         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6588         amdgpu_ring_write(ring, addr & 0xfffffffc);
6589         amdgpu_ring_write(ring, upper_32_bits(addr));
6590         amdgpu_ring_write(ring, lower_32_bits(seq));
6591         amdgpu_ring_write(ring, upper_32_bits(seq));
6592 }
6593
6594 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6595                                          u64 seq, unsigned int flags)
6596 {
6597         /* we only allocate 32bit for each seq wb address */
6598         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6599
6600         /* write fence seq to the "addr" */
6601         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6602         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6603                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6604         amdgpu_ring_write(ring, lower_32_bits(addr));
6605         amdgpu_ring_write(ring, upper_32_bits(addr));
6606         amdgpu_ring_write(ring, lower_32_bits(seq));
6607
6608         if (flags & AMDGPU_FENCE_FLAG_INT) {
6609                 /* set register to trigger INT */
6610                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6611                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6612                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6613                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6614                 amdgpu_ring_write(ring, 0);
6615                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6616         }
6617 }
6618
6619 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6620 {
6621         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6622         amdgpu_ring_write(ring, 0);
6623 }
6624
6625 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6626 {
6627         uint32_t dw2 = 0;
6628
6629         if (amdgpu_sriov_vf(ring->adev))
6630                 gfx_v8_0_ring_emit_ce_meta(ring);
6631
6632         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6633         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6634                 gfx_v8_0_ring_emit_vgt_flush(ring);
6635                 /* set load_global_config & load_global_uconfig */
6636                 dw2 |= 0x8001;
6637                 /* set load_cs_sh_regs */
6638                 dw2 |= 0x01000000;
6639                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6640                 dw2 |= 0x10002;
6641
6642                 /* set load_ce_ram if preamble presented */
6643                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6644                         dw2 |= 0x10000000;
6645         } else {
6646                 /* still load_ce_ram if this is the first time preamble presented
6647                  * although there is no context switch happens.
6648                  */
6649                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6650                         dw2 |= 0x10000000;
6651         }
6652
6653         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6654         amdgpu_ring_write(ring, dw2);
6655         amdgpu_ring_write(ring, 0);
6656 }
6657
6658 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6659 {
6660         unsigned ret;
6661
6662         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6663         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6664         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6665         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6666         ret = ring->wptr & ring->buf_mask;
6667         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6668         return ret;
6669 }
6670
6671 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6672 {
6673         unsigned cur;
6674
6675         BUG_ON(offset > ring->buf_mask);
6676         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6677
6678         cur = (ring->wptr & ring->buf_mask) - 1;
6679         if (likely(cur > offset))
6680                 ring->ring[offset] = cur - offset;
6681         else
6682                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6683 }
6684
6685 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6686 {
6687         struct amdgpu_device *adev = ring->adev;
6688
6689         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6690         amdgpu_ring_write(ring, 0 |     /* src: register*/
6691                                 (5 << 8) |      /* dst: memory */
6692                                 (1 << 20));     /* write confirm */
6693         amdgpu_ring_write(ring, reg);
6694         amdgpu_ring_write(ring, 0);
6695         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6696                                 adev->virt.reg_val_offs * 4));
6697         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6698                                 adev->virt.reg_val_offs * 4));
6699 }
6700
6701 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6702                                   uint32_t val)
6703 {
6704         uint32_t cmd;
6705
6706         switch (ring->funcs->type) {
6707         case AMDGPU_RING_TYPE_GFX:
6708                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6709                 break;
6710         case AMDGPU_RING_TYPE_KIQ:
6711                 cmd = 1 << 16; /* no inc addr */
6712                 break;
6713         default:
6714                 cmd = WR_CONFIRM;
6715                 break;
6716         }
6717
6718         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6719         amdgpu_ring_write(ring, cmd);
6720         amdgpu_ring_write(ring, reg);
6721         amdgpu_ring_write(ring, 0);
6722         amdgpu_ring_write(ring, val);
6723 }
6724
6725 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6726                                                  enum amdgpu_interrupt_state state)
6727 {
6728         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6729                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6730 }
6731
6732 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6733                                                      int me, int pipe,
6734                                                      enum amdgpu_interrupt_state state)
6735 {
6736         u32 mec_int_cntl, mec_int_cntl_reg;
6737
6738         /*
6739          * amdgpu controls only the first MEC. That's why this function only
6740          * handles the setting of interrupts for this specific MEC. All other
6741          * pipes' interrupts are set by amdkfd.
6742          */
6743
6744         if (me == 1) {
6745                 switch (pipe) {
6746                 case 0:
6747                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6748                         break;
6749                 case 1:
6750                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6751                         break;
6752                 case 2:
6753                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6754                         break;
6755                 case 3:
6756                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6757                         break;
6758                 default:
6759                         DRM_DEBUG("invalid pipe %d\n", pipe);
6760                         return;
6761                 }
6762         } else {
6763                 DRM_DEBUG("invalid me %d\n", me);
6764                 return;
6765         }
6766
6767         switch (state) {
6768         case AMDGPU_IRQ_STATE_DISABLE:
6769                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6770                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6771                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6772                 break;
6773         case AMDGPU_IRQ_STATE_ENABLE:
6774                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6775                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6776                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6777                 break;
6778         default:
6779                 break;
6780         }
6781 }
6782
6783 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6784                                              struct amdgpu_irq_src *source,
6785                                              unsigned type,
6786                                              enum amdgpu_interrupt_state state)
6787 {
6788         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6789                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6790
6791         return 0;
6792 }
6793
6794 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6795                                               struct amdgpu_irq_src *source,
6796                                               unsigned type,
6797                                               enum amdgpu_interrupt_state state)
6798 {
6799         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6800                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6801
6802         return 0;
6803 }
6804
6805 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6806                                             struct amdgpu_irq_src *src,
6807                                             unsigned type,
6808                                             enum amdgpu_interrupt_state state)
6809 {
6810         switch (type) {
6811         case AMDGPU_CP_IRQ_GFX_EOP:
6812                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6813                 break;
6814         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6815                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6816                 break;
6817         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6818                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6819                 break;
6820         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6821                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6822                 break;
6823         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6824                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6825                 break;
6826         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6827                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6828                 break;
6829         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6830                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6831                 break;
6832         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6833                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6834                 break;
6835         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6836                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6837                 break;
6838         default:
6839                 break;
6840         }
6841         return 0;
6842 }
6843
6844 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6845                                          struct amdgpu_irq_src *source,
6846                                          unsigned int type,
6847                                          enum amdgpu_interrupt_state state)
6848 {
6849         int enable_flag;
6850
6851         switch (state) {
6852         case AMDGPU_IRQ_STATE_DISABLE:
6853                 enable_flag = 0;
6854                 break;
6855
6856         case AMDGPU_IRQ_STATE_ENABLE:
6857                 enable_flag = 1;
6858                 break;
6859
6860         default:
6861                 return -EINVAL;
6862         }
6863
6864         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6865         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6866         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6867         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6868         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6869         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6870                      enable_flag);
6871         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6872                      enable_flag);
6873         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6874                      enable_flag);
6875         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6876                      enable_flag);
6877         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6878                      enable_flag);
6879         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6880                      enable_flag);
6881         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6882                      enable_flag);
6883         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6884                      enable_flag);
6885
6886         return 0;
6887 }
6888
6889 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6890                                      struct amdgpu_irq_src *source,
6891                                      unsigned int type,
6892                                      enum amdgpu_interrupt_state state)
6893 {
6894         int enable_flag;
6895
6896         switch (state) {
6897         case AMDGPU_IRQ_STATE_DISABLE:
6898                 enable_flag = 1;
6899                 break;
6900
6901         case AMDGPU_IRQ_STATE_ENABLE:
6902                 enable_flag = 0;
6903                 break;
6904
6905         default:
6906                 return -EINVAL;
6907         }
6908
6909         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6910                      enable_flag);
6911
6912         return 0;
6913 }
6914
6915 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6916                             struct amdgpu_irq_src *source,
6917                             struct amdgpu_iv_entry *entry)
6918 {
6919         int i;
6920         u8 me_id, pipe_id, queue_id;
6921         struct amdgpu_ring *ring;
6922
6923         DRM_DEBUG("IH: CP EOP\n");
6924         me_id = (entry->ring_id & 0x0c) >> 2;
6925         pipe_id = (entry->ring_id & 0x03) >> 0;
6926         queue_id = (entry->ring_id & 0x70) >> 4;
6927
6928         switch (me_id) {
6929         case 0:
6930                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6931                 break;
6932         case 1:
6933         case 2:
6934                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6935                         ring = &adev->gfx.compute_ring[i];
6936                         /* Per-queue interrupt is supported for MEC starting from VI.
6937                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6938                           */
6939                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6940                                 amdgpu_fence_process(ring);
6941                 }
6942                 break;
6943         }
6944         return 0;
6945 }
6946
6947 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6948                                  struct amdgpu_irq_src *source,
6949                                  struct amdgpu_iv_entry *entry)
6950 {
6951         DRM_ERROR("Illegal register access in command stream\n");
6952         schedule_work(&adev->reset_work);
6953         return 0;
6954 }
6955
6956 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6957                                   struct amdgpu_irq_src *source,
6958                                   struct amdgpu_iv_entry *entry)
6959 {
6960         DRM_ERROR("Illegal instruction in command stream\n");
6961         schedule_work(&adev->reset_work);
6962         return 0;
6963 }
6964
6965 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6966                                      struct amdgpu_irq_src *source,
6967                                      struct amdgpu_iv_entry *entry)
6968 {
6969         DRM_ERROR("CP EDC/ECC error detected.");
6970         return 0;
6971 }
6972
6973 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6974 {
6975         u32 enc, se_id, sh_id, cu_id;
6976         char type[20];
6977         int sq_edc_source = -1;
6978
6979         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6980         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6981
6982         switch (enc) {
6983                 case 0:
6984                         DRM_INFO("SQ general purpose intr detected:"
6985                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6986                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6987                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6988                                         "wlt %d, thread_trace %d.\n",
6989                                         se_id,
6990                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6991                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6992                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6993                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6994                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6995                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6996                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6997                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6998                                         );
6999                         break;
7000                 case 1:
7001                 case 2:
7002
7003                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
7004                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
7005
7006                         /*
7007                          * This function can be called either directly from ISR
7008                          * or from BH in which case we can access SQ_EDC_INFO
7009                          * instance
7010                          */
7011                         if (in_task()) {
7012                                 mutex_lock(&adev->grbm_idx_mutex);
7013                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
7014
7015                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
7016
7017                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7018                                 mutex_unlock(&adev->grbm_idx_mutex);
7019                         }
7020
7021                         if (enc == 1)
7022                                 sprintf(type, "instruction intr");
7023                         else
7024                                 sprintf(type, "EDC/ECC error");
7025
7026                         DRM_INFO(
7027                                 "SQ %s detected: "
7028                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
7029                                         "trap %s, sq_ed_info.source %s.\n",
7030                                         type, se_id, sh_id, cu_id,
7031                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
7032                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
7033                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
7034                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
7035                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
7036                                 );
7037                         break;
7038                 default:
7039                         DRM_ERROR("SQ invalid encoding type\n.");
7040         }
7041 }
7042
7043 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
7044 {
7045
7046         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
7047         struct sq_work *sq_work = container_of(work, struct sq_work, work);
7048
7049         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
7050 }
7051
7052 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7053                            struct amdgpu_irq_src *source,
7054                            struct amdgpu_iv_entry *entry)
7055 {
7056         unsigned ih_data = entry->src_data[0];
7057
7058         /*
7059          * Try to submit work so SQ_EDC_INFO can be accessed from
7060          * BH. If previous work submission hasn't finished yet
7061          * just print whatever info is possible directly from the ISR.
7062          */
7063         if (work_pending(&adev->gfx.sq_work.work)) {
7064                 gfx_v8_0_parse_sq_irq(adev, ih_data);
7065         } else {
7066                 adev->gfx.sq_work.ih_data = ih_data;
7067                 schedule_work(&adev->gfx.sq_work.work);
7068         }
7069
7070         return 0;
7071 }
7072
7073 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7074                                             struct amdgpu_irq_src *src,
7075                                             unsigned int type,
7076                                             enum amdgpu_interrupt_state state)
7077 {
7078         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7079
7080         switch (type) {
7081         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7082                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7083                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7084                 if (ring->me == 1)
7085                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7086                                      ring->pipe,
7087                                      GENERIC2_INT_ENABLE,
7088                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7089                 else
7090                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7091                                      ring->pipe,
7092                                      GENERIC2_INT_ENABLE,
7093                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7094                 break;
7095         default:
7096                 BUG(); /* kiq only support GENERIC2_INT now */
7097                 break;
7098         }
7099         return 0;
7100 }
7101
7102 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7103                             struct amdgpu_irq_src *source,
7104                             struct amdgpu_iv_entry *entry)
7105 {
7106         u8 me_id, pipe_id, queue_id;
7107         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7108
7109         me_id = (entry->ring_id & 0x0c) >> 2;
7110         pipe_id = (entry->ring_id & 0x03) >> 0;
7111         queue_id = (entry->ring_id & 0x70) >> 4;
7112         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7113                    me_id, pipe_id, queue_id);
7114
7115         amdgpu_fence_process(ring);
7116         return 0;
7117 }
7118
7119 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7120         .name = "gfx_v8_0",
7121         .early_init = gfx_v8_0_early_init,
7122         .late_init = gfx_v8_0_late_init,
7123         .sw_init = gfx_v8_0_sw_init,
7124         .sw_fini = gfx_v8_0_sw_fini,
7125         .hw_init = gfx_v8_0_hw_init,
7126         .hw_fini = gfx_v8_0_hw_fini,
7127         .suspend = gfx_v8_0_suspend,
7128         .resume = gfx_v8_0_resume,
7129         .is_idle = gfx_v8_0_is_idle,
7130         .wait_for_idle = gfx_v8_0_wait_for_idle,
7131         .check_soft_reset = gfx_v8_0_check_soft_reset,
7132         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7133         .soft_reset = gfx_v8_0_soft_reset,
7134         .post_soft_reset = gfx_v8_0_post_soft_reset,
7135         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7136         .set_powergating_state = gfx_v8_0_set_powergating_state,
7137         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7138 };
7139
7140 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7141         .type = AMDGPU_RING_TYPE_GFX,
7142         .align_mask = 0xff,
7143         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7144         .support_64bit_ptrs = false,
7145         .get_rptr = gfx_v8_0_ring_get_rptr,
7146         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7147         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7148         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7149                 5 +  /* COND_EXEC */
7150                 7 +  /* PIPELINE_SYNC */
7151                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7152                 8 +  /* FENCE for VM_FLUSH */
7153                 20 + /* GDS switch */
7154                 4 + /* double SWITCH_BUFFER,
7155                        the first COND_EXEC jump to the place just
7156                            prior to this double SWITCH_BUFFER  */
7157                 5 + /* COND_EXEC */
7158                 7 +      /*     HDP_flush */
7159                 4 +      /*     VGT_flush */
7160                 14 + /* CE_META */
7161                 31 + /* DE_META */
7162                 3 + /* CNTX_CTRL */
7163                 5 + /* HDP_INVL */
7164                 8 + 8 + /* FENCE x2 */
7165                 2, /* SWITCH_BUFFER */
7166         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7167         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7168         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7169         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7170         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7171         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7172         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7173         .test_ring = gfx_v8_0_ring_test_ring,
7174         .test_ib = gfx_v8_0_ring_test_ib,
7175         .insert_nop = amdgpu_ring_insert_nop,
7176         .pad_ib = amdgpu_ring_generic_pad_ib,
7177         .emit_switch_buffer = gfx_v8_ring_emit_sb,
7178         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7179         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7180         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7181         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7182 };
7183
7184 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7185         .type = AMDGPU_RING_TYPE_COMPUTE,
7186         .align_mask = 0xff,
7187         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7188         .support_64bit_ptrs = false,
7189         .get_rptr = gfx_v8_0_ring_get_rptr,
7190         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7191         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7192         .emit_frame_size =
7193                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7194                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7195                 5 + /* hdp_invalidate */
7196                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7197                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7198                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7199         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7200         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7201         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7202         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7203         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7204         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7205         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7206         .test_ring = gfx_v8_0_ring_test_ring,
7207         .test_ib = gfx_v8_0_ring_test_ib,
7208         .insert_nop = amdgpu_ring_insert_nop,
7209         .pad_ib = amdgpu_ring_generic_pad_ib,
7210         .set_priority = gfx_v8_0_ring_set_priority_compute,
7211         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7212 };
7213
7214 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7215         .type = AMDGPU_RING_TYPE_KIQ,
7216         .align_mask = 0xff,
7217         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7218         .support_64bit_ptrs = false,
7219         .get_rptr = gfx_v8_0_ring_get_rptr,
7220         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7221         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7222         .emit_frame_size =
7223                 20 + /* gfx_v8_0_ring_emit_gds_switch */
7224                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7225                 5 + /* hdp_invalidate */
7226                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7227                 17 + /* gfx_v8_0_ring_emit_vm_flush */
7228                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7229         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7230         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7231         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7232         .test_ring = gfx_v8_0_ring_test_ring,
7233         .test_ib = gfx_v8_0_ring_test_ib,
7234         .insert_nop = amdgpu_ring_insert_nop,
7235         .pad_ib = amdgpu_ring_generic_pad_ib,
7236         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7237         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7238 };
7239
7240 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7241 {
7242         int i;
7243
7244         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7245
7246         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7247                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7248
7249         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7250                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7251 }
7252
7253 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7254         .set = gfx_v8_0_set_eop_interrupt_state,
7255         .process = gfx_v8_0_eop_irq,
7256 };
7257
7258 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7259         .set = gfx_v8_0_set_priv_reg_fault_state,
7260         .process = gfx_v8_0_priv_reg_irq,
7261 };
7262
7263 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7264         .set = gfx_v8_0_set_priv_inst_fault_state,
7265         .process = gfx_v8_0_priv_inst_irq,
7266 };
7267
7268 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7269         .set = gfx_v8_0_kiq_set_interrupt_state,
7270         .process = gfx_v8_0_kiq_irq,
7271 };
7272
7273 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7274         .set = gfx_v8_0_set_cp_ecc_int_state,
7275         .process = gfx_v8_0_cp_ecc_error_irq,
7276 };
7277
7278 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7279         .set = gfx_v8_0_set_sq_int_state,
7280         .process = gfx_v8_0_sq_irq,
7281 };
7282
7283 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7284 {
7285         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7286         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7287
7288         adev->gfx.priv_reg_irq.num_types = 1;
7289         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7290
7291         adev->gfx.priv_inst_irq.num_types = 1;
7292         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7293
7294         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7295         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7296
7297         adev->gfx.cp_ecc_error_irq.num_types = 1;
7298         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7299
7300         adev->gfx.sq_irq.num_types = 1;
7301         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7302 }
7303
7304 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7305 {
7306         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7307 }
7308
7309 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7310 {
7311         /* init asci gds info */
7312         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7313         adev->gds.gws.total_size = 64;
7314         adev->gds.oa.total_size = 16;
7315
7316         if (adev->gds.mem.total_size == 64 * 1024) {
7317                 adev->gds.mem.gfx_partition_size = 4096;
7318                 adev->gds.mem.cs_partition_size = 4096;
7319
7320                 adev->gds.gws.gfx_partition_size = 4;
7321                 adev->gds.gws.cs_partition_size = 4;
7322
7323                 adev->gds.oa.gfx_partition_size = 4;
7324                 adev->gds.oa.cs_partition_size = 1;
7325         } else {
7326                 adev->gds.mem.gfx_partition_size = 1024;
7327                 adev->gds.mem.cs_partition_size = 1024;
7328
7329                 adev->gds.gws.gfx_partition_size = 16;
7330                 adev->gds.gws.cs_partition_size = 16;
7331
7332                 adev->gds.oa.gfx_partition_size = 4;
7333                 adev->gds.oa.cs_partition_size = 4;
7334         }
7335 }
7336
7337 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7338                                                  u32 bitmap)
7339 {
7340         u32 data;
7341
7342         if (!bitmap)
7343                 return;
7344
7345         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7346         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7347
7348         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7349 }
7350
7351 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7352 {
7353         u32 data, mask;
7354
7355         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7356                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7357
7358         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7359
7360         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7361 }
7362
7363 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7364 {
7365         int i, j, k, counter, active_cu_number = 0;
7366         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7367         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7368         unsigned disable_masks[4 * 2];
7369         u32 ao_cu_num;
7370
7371         memset(cu_info, 0, sizeof(*cu_info));
7372
7373         if (adev->flags & AMD_IS_APU)
7374                 ao_cu_num = 2;
7375         else
7376                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7377
7378         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7379
7380         mutex_lock(&adev->grbm_idx_mutex);
7381         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7382                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7383                         mask = 1;
7384                         ao_bitmap = 0;
7385                         counter = 0;
7386                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7387                         if (i < 4 && j < 2)
7388                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7389                                         adev, disable_masks[i * 2 + j]);
7390                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7391                         cu_info->bitmap[i][j] = bitmap;
7392
7393                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7394                                 if (bitmap & mask) {
7395                                         if (counter < ao_cu_num)
7396                                                 ao_bitmap |= mask;
7397                                         counter ++;
7398                                 }
7399                                 mask <<= 1;
7400                         }
7401                         active_cu_number += counter;
7402                         if (i < 2 && j < 2)
7403                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7404                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7405                 }
7406         }
7407         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7408         mutex_unlock(&adev->grbm_idx_mutex);
7409
7410         cu_info->number = active_cu_number;
7411         cu_info->ao_cu_mask = ao_cu_mask;
7412         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7413         cu_info->max_waves_per_simd = 10;
7414         cu_info->max_scratch_slots_per_cu = 32;
7415         cu_info->wave_front_size = 64;
7416         cu_info->lds_size = 64;
7417 }
7418
7419 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7420 {
7421         .type = AMD_IP_BLOCK_TYPE_GFX,
7422         .major = 8,
7423         .minor = 0,
7424         .rev = 0,
7425         .funcs = &gfx_v8_0_ip_funcs,
7426 };
7427
7428 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7429 {
7430         .type = AMD_IP_BLOCK_TYPE_GFX,
7431         .major = 8,
7432         .minor = 1,
7433         .rev = 0,
7434         .funcs = &gfx_v8_0_ip_funcs,
7435 };
7436
7437 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7438 {
7439         uint64_t ce_payload_addr;
7440         int cnt_ce;
7441         union {
7442                 struct vi_ce_ib_state regular;
7443                 struct vi_ce_ib_state_chained_ib chained;
7444         } ce_payload = {};
7445
7446         if (ring->adev->virt.chained_ib_support) {
7447                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7448                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7449                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7450         } else {
7451                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7452                         offsetof(struct vi_gfx_meta_data, ce_payload);
7453                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7454         }
7455
7456         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7457         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7458                                 WRITE_DATA_DST_SEL(8) |
7459                                 WR_CONFIRM) |
7460                                 WRITE_DATA_CACHE_POLICY(0));
7461         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7462         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7463         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7464 }
7465
7466 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7467 {
7468         uint64_t de_payload_addr, gds_addr, csa_addr;
7469         int cnt_de;
7470         union {
7471                 struct vi_de_ib_state regular;
7472                 struct vi_de_ib_state_chained_ib chained;
7473         } de_payload = {};
7474
7475         csa_addr = amdgpu_csa_vaddr(ring->adev);
7476         gds_addr = csa_addr + 4096;
7477         if (ring->adev->virt.chained_ib_support) {
7478                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7479                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7480                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7481                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7482         } else {
7483                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7484                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7485                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7486                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7487         }
7488
7489         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7490         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7491                                 WRITE_DATA_DST_SEL(8) |
7492                                 WR_CONFIRM) |
7493                                 WRITE_DATA_CACHE_POLICY(0));
7494         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7495         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7496         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7497 }