27b3de44add7a08c5f839dd93c5e8014f2093376
[linux-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166         mmGB_GPU_ID, 0x0000000f, 0x00000000,
167         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283         mmSQ_CONFIG, 0x07f80000, 0x01180000,
284         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291
292 static const u32 polaris11_golden_common_all[] =
293 {
294         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315         mmSQ_CONFIG, 0x07f80000, 0x07180000,
316         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 polaris10_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334
335 static const u32 fiji_golden_common_all[] =
336 {
337         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348
349 static const u32 golden_settings_fiji_a10[] =
350 {
351         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402
403 static const u32 golden_settings_iceland_a11[] =
404 {
405         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408         mmGB_GPU_ID, 0x0000000f, 0x00000000,
409         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422
423 static const u32 iceland_golden_common_all[] =
424 {
425         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502
503 static const u32 cz_golden_settings_a11[] =
504 {
505         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507         mmGB_GPU_ID, 0x0000000f, 0x00000000,
508         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518
519 static const u32 cz_golden_common_all[] =
520 {
521         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609
610 static const u32 stoney_golden_settings_a11[] =
611 {
612         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613         mmGB_GPU_ID, 0x0000000f, 0x00000000,
614         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623
624 static const u32 stoney_golden_common_all[] =
625 {
626         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
644 };
645
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652
653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
654 {
655         switch (adev->asic_type) {
656         case CHIP_TOPAZ:
657                 amdgpu_program_register_sequence(adev,
658                                                  iceland_mgcg_cgcg_init,
659                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
660                 amdgpu_program_register_sequence(adev,
661                                                  golden_settings_iceland_a11,
662                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
663                 amdgpu_program_register_sequence(adev,
664                                                  iceland_golden_common_all,
665                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
666                 break;
667         case CHIP_FIJI:
668                 amdgpu_program_register_sequence(adev,
669                                                  fiji_mgcg_cgcg_init,
670                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
671                 amdgpu_program_register_sequence(adev,
672                                                  golden_settings_fiji_a10,
673                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
674                 amdgpu_program_register_sequence(adev,
675                                                  fiji_golden_common_all,
676                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
677                 break;
678
679         case CHIP_TONGA:
680                 amdgpu_program_register_sequence(adev,
681                                                  tonga_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_tonga_a11,
685                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
686                 amdgpu_program_register_sequence(adev,
687                                                  tonga_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
689                 break;
690         case CHIP_POLARIS11:
691                 amdgpu_program_register_sequence(adev,
692                                                  golden_settings_polaris11_a11,
693                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
694                 amdgpu_program_register_sequence(adev,
695                                                  polaris11_golden_common_all,
696                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
697                 break;
698         case CHIP_POLARIS10:
699                 amdgpu_program_register_sequence(adev,
700                                                  golden_settings_polaris10_a11,
701                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
702                 amdgpu_program_register_sequence(adev,
703                                                  polaris10_golden_common_all,
704                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
705                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
706                 if (adev->pdev->revision == 0xc7 &&
707                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
708                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
709                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
710                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
711                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
712                 }
713                 break;
714         case CHIP_CARRIZO:
715                 amdgpu_program_register_sequence(adev,
716                                                  cz_mgcg_cgcg_init,
717                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
718                 amdgpu_program_register_sequence(adev,
719                                                  cz_golden_settings_a11,
720                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
721                 amdgpu_program_register_sequence(adev,
722                                                  cz_golden_common_all,
723                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
724                 break;
725         case CHIP_STONEY:
726                 amdgpu_program_register_sequence(adev,
727                                                  stoney_mgcg_cgcg_init,
728                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
729                 amdgpu_program_register_sequence(adev,
730                                                  stoney_golden_settings_a11,
731                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
732                 amdgpu_program_register_sequence(adev,
733                                                  stoney_golden_common_all,
734                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
735                 break;
736         default:
737                 break;
738         }
739 }
740
741 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
742 {
743         int i;
744
745         adev->gfx.scratch.num_reg = 7;
746         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
747         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
748                 adev->gfx.scratch.free[i] = true;
749                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
750         }
751 }
752
753 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
754 {
755         struct amdgpu_device *adev = ring->adev;
756         uint32_t scratch;
757         uint32_t tmp = 0;
758         unsigned i;
759         int r;
760
761         r = amdgpu_gfx_scratch_get(adev, &scratch);
762         if (r) {
763                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
764                 return r;
765         }
766         WREG32(scratch, 0xCAFEDEAD);
767         r = amdgpu_ring_alloc(ring, 3);
768         if (r) {
769                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
770                           ring->idx, r);
771                 amdgpu_gfx_scratch_free(adev, scratch);
772                 return r;
773         }
774         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
775         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
776         amdgpu_ring_write(ring, 0xDEADBEEF);
777         amdgpu_ring_commit(ring);
778
779         for (i = 0; i < adev->usec_timeout; i++) {
780                 tmp = RREG32(scratch);
781                 if (tmp == 0xDEADBEEF)
782                         break;
783                 DRM_UDELAY(1);
784         }
785         if (i < adev->usec_timeout) {
786                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
787                          ring->idx, i);
788         } else {
789                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
790                           ring->idx, scratch, tmp);
791                 r = -EINVAL;
792         }
793         amdgpu_gfx_scratch_free(adev, scratch);
794         return r;
795 }
796
797 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
798 {
799         struct amdgpu_device *adev = ring->adev;
800         struct amdgpu_ib ib;
801         struct fence *f = NULL;
802         uint32_t scratch;
803         uint32_t tmp = 0;
804         long r;
805
806         r = amdgpu_gfx_scratch_get(adev, &scratch);
807         if (r) {
808                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
809                 return r;
810         }
811         WREG32(scratch, 0xCAFEDEAD);
812         memset(&ib, 0, sizeof(ib));
813         r = amdgpu_ib_get(adev, NULL, 256, &ib);
814         if (r) {
815                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
816                 goto err1;
817         }
818         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
819         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
820         ib.ptr[2] = 0xDEADBEEF;
821         ib.length_dw = 3;
822
823         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
824         if (r)
825                 goto err2;
826
827         r = fence_wait_timeout(f, false, timeout);
828         if (r == 0) {
829                 DRM_ERROR("amdgpu: IB test timed out.\n");
830                 r = -ETIMEDOUT;
831                 goto err2;
832         } else if (r < 0) {
833                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
834                 goto err2;
835         }
836         tmp = RREG32(scratch);
837         if (tmp == 0xDEADBEEF) {
838                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
839                 r = 0;
840         } else {
841                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
842                           scratch, tmp);
843                 r = -EINVAL;
844         }
845 err2:
846         amdgpu_ib_free(adev, &ib, NULL);
847         fence_put(f);
848 err1:
849         amdgpu_gfx_scratch_free(adev, scratch);
850         return r;
851 }
852
853
854 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
855         release_firmware(adev->gfx.pfp_fw);
856         adev->gfx.pfp_fw = NULL;
857         release_firmware(adev->gfx.me_fw);
858         adev->gfx.me_fw = NULL;
859         release_firmware(adev->gfx.ce_fw);
860         adev->gfx.ce_fw = NULL;
861         release_firmware(adev->gfx.rlc_fw);
862         adev->gfx.rlc_fw = NULL;
863         release_firmware(adev->gfx.mec_fw);
864         adev->gfx.mec_fw = NULL;
865         if ((adev->asic_type != CHIP_STONEY) &&
866             (adev->asic_type != CHIP_TOPAZ))
867                 release_firmware(adev->gfx.mec2_fw);
868         adev->gfx.mec2_fw = NULL;
869
870         kfree(adev->gfx.rlc.register_list_format);
871 }
872
873 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
874 {
875         const char *chip_name;
876         char fw_name[30];
877         int err;
878         struct amdgpu_firmware_info *info = NULL;
879         const struct common_firmware_header *header = NULL;
880         const struct gfx_firmware_header_v1_0 *cp_hdr;
881         const struct rlc_firmware_header_v2_0 *rlc_hdr;
882         unsigned int *tmp = NULL, i;
883
884         DRM_DEBUG("\n");
885
886         switch (adev->asic_type) {
887         case CHIP_TOPAZ:
888                 chip_name = "topaz";
889                 break;
890         case CHIP_TONGA:
891                 chip_name = "tonga";
892                 break;
893         case CHIP_CARRIZO:
894                 chip_name = "carrizo";
895                 break;
896         case CHIP_FIJI:
897                 chip_name = "fiji";
898                 break;
899         case CHIP_POLARIS11:
900                 chip_name = "polaris11";
901                 break;
902         case CHIP_POLARIS10:
903                 chip_name = "polaris10";
904                 break;
905         case CHIP_STONEY:
906                 chip_name = "stoney";
907                 break;
908         default:
909                 BUG();
910         }
911
912         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
913         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
914         if (err)
915                 goto out;
916         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
917         if (err)
918                 goto out;
919         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
920         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
921         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
922
923         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
924         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
925         if (err)
926                 goto out;
927         err = amdgpu_ucode_validate(adev->gfx.me_fw);
928         if (err)
929                 goto out;
930         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
931         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933
934         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
935         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
936         if (err)
937                 goto out;
938         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
939         if (err)
940                 goto out;
941         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
942         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944
945         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
946         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
947         if (err)
948                 goto out;
949         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
950         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
951         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
952         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
953
954         adev->gfx.rlc.save_and_restore_offset =
955                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
956         adev->gfx.rlc.clear_state_descriptor_offset =
957                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
958         adev->gfx.rlc.avail_scratch_ram_locations =
959                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
960         adev->gfx.rlc.reg_restore_list_size =
961                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
962         adev->gfx.rlc.reg_list_format_start =
963                         le32_to_cpu(rlc_hdr->reg_list_format_start);
964         adev->gfx.rlc.reg_list_format_separate_start =
965                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
966         adev->gfx.rlc.starting_offsets_start =
967                         le32_to_cpu(rlc_hdr->starting_offsets_start);
968         adev->gfx.rlc.reg_list_format_size_bytes =
969                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
970         adev->gfx.rlc.reg_list_size_bytes =
971                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
972
973         adev->gfx.rlc.register_list_format =
974                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
975                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
976
977         if (!adev->gfx.rlc.register_list_format) {
978                 err = -ENOMEM;
979                 goto out;
980         }
981
982         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
984         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
985                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
986
987         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
988
989         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
990                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
991         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
992                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
993
994         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
995         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
996         if (err)
997                 goto out;
998         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
999         if (err)
1000                 goto out;
1001         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1002         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1003         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1004
1005         if ((adev->asic_type != CHIP_STONEY) &&
1006             (adev->asic_type != CHIP_TOPAZ)) {
1007                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1008                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1009                 if (!err) {
1010                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1011                         if (err)
1012                                 goto out;
1013                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1014                                 adev->gfx.mec2_fw->data;
1015                         adev->gfx.mec2_fw_version =
1016                                 le32_to_cpu(cp_hdr->header.ucode_version);
1017                         adev->gfx.mec2_feature_version =
1018                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1019                 } else {
1020                         err = 0;
1021                         adev->gfx.mec2_fw = NULL;
1022                 }
1023         }
1024
1025         if (adev->firmware.smu_load) {
1026                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1027                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1028                 info->fw = adev->gfx.pfp_fw;
1029                 header = (const struct common_firmware_header *)info->fw->data;
1030                 adev->firmware.fw_size +=
1031                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032
1033                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1034                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1035                 info->fw = adev->gfx.me_fw;
1036                 header = (const struct common_firmware_header *)info->fw->data;
1037                 adev->firmware.fw_size +=
1038                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039
1040                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1041                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1042                 info->fw = adev->gfx.ce_fw;
1043                 header = (const struct common_firmware_header *)info->fw->data;
1044                 adev->firmware.fw_size +=
1045                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046
1047                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1048                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1049                 info->fw = adev->gfx.rlc_fw;
1050                 header = (const struct common_firmware_header *)info->fw->data;
1051                 adev->firmware.fw_size +=
1052                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1055                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1056                 info->fw = adev->gfx.mec_fw;
1057                 header = (const struct common_firmware_header *)info->fw->data;
1058                 adev->firmware.fw_size +=
1059                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061                 /* we need account JT in */
1062                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1063                 adev->firmware.fw_size +=
1064                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1065
1066                 if (amdgpu_sriov_vf(adev)) {
1067                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1068                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1069                         info->fw = adev->gfx.mec_fw;
1070                         adev->firmware.fw_size +=
1071                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1072                 }
1073
1074                 if (adev->gfx.mec2_fw) {
1075                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1076                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1077                         info->fw = adev->gfx.mec2_fw;
1078                         header = (const struct common_firmware_header *)info->fw->data;
1079                         adev->firmware.fw_size +=
1080                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1081                 }
1082
1083         }
1084
1085 out:
1086         if (err) {
1087                 dev_err(adev->dev,
1088                         "gfx8: Failed to load firmware \"%s\"\n",
1089                         fw_name);
1090                 release_firmware(adev->gfx.pfp_fw);
1091                 adev->gfx.pfp_fw = NULL;
1092                 release_firmware(adev->gfx.me_fw);
1093                 adev->gfx.me_fw = NULL;
1094                 release_firmware(adev->gfx.ce_fw);
1095                 adev->gfx.ce_fw = NULL;
1096                 release_firmware(adev->gfx.rlc_fw);
1097                 adev->gfx.rlc_fw = NULL;
1098                 release_firmware(adev->gfx.mec_fw);
1099                 adev->gfx.mec_fw = NULL;
1100                 release_firmware(adev->gfx.mec2_fw);
1101                 adev->gfx.mec2_fw = NULL;
1102         }
1103         return err;
1104 }
1105
1106 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1107                                     volatile u32 *buffer)
1108 {
1109         u32 count = 0, i;
1110         const struct cs_section_def *sect = NULL;
1111         const struct cs_extent_def *ext = NULL;
1112
1113         if (adev->gfx.rlc.cs_data == NULL)
1114                 return;
1115         if (buffer == NULL)
1116                 return;
1117
1118         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1119         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1120
1121         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1122         buffer[count++] = cpu_to_le32(0x80000000);
1123         buffer[count++] = cpu_to_le32(0x80000000);
1124
1125         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1126                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1127                         if (sect->id == SECT_CONTEXT) {
1128                                 buffer[count++] =
1129                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1130                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1131                                                 PACKET3_SET_CONTEXT_REG_START);
1132                                 for (i = 0; i < ext->reg_count; i++)
1133                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1134                         } else {
1135                                 return;
1136                         }
1137                 }
1138         }
1139
1140         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1141         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1142                         PACKET3_SET_CONTEXT_REG_START);
1143         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1144         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1145
1146         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1147         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1148
1149         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1150         buffer[count++] = cpu_to_le32(0);
1151 }
1152
1153 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1154 {
1155         const __le32 *fw_data;
1156         volatile u32 *dst_ptr;
1157         int me, i, max_me = 4;
1158         u32 bo_offset = 0;
1159         u32 table_offset, table_size;
1160
1161         if (adev->asic_type == CHIP_CARRIZO)
1162                 max_me = 5;
1163
1164         /* write the cp table buffer */
1165         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1166         for (me = 0; me < max_me; me++) {
1167                 if (me == 0) {
1168                         const struct gfx_firmware_header_v1_0 *hdr =
1169                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1170                         fw_data = (const __le32 *)
1171                                 (adev->gfx.ce_fw->data +
1172                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1173                         table_offset = le32_to_cpu(hdr->jt_offset);
1174                         table_size = le32_to_cpu(hdr->jt_size);
1175                 } else if (me == 1) {
1176                         const struct gfx_firmware_header_v1_0 *hdr =
1177                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1178                         fw_data = (const __le32 *)
1179                                 (adev->gfx.pfp_fw->data +
1180                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1181                         table_offset = le32_to_cpu(hdr->jt_offset);
1182                         table_size = le32_to_cpu(hdr->jt_size);
1183                 } else if (me == 2) {
1184                         const struct gfx_firmware_header_v1_0 *hdr =
1185                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1186                         fw_data = (const __le32 *)
1187                                 (adev->gfx.me_fw->data +
1188                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1189                         table_offset = le32_to_cpu(hdr->jt_offset);
1190                         table_size = le32_to_cpu(hdr->jt_size);
1191                 } else if (me == 3) {
1192                         const struct gfx_firmware_header_v1_0 *hdr =
1193                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1194                         fw_data = (const __le32 *)
1195                                 (adev->gfx.mec_fw->data +
1196                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1197                         table_offset = le32_to_cpu(hdr->jt_offset);
1198                         table_size = le32_to_cpu(hdr->jt_size);
1199                 } else  if (me == 4) {
1200                         const struct gfx_firmware_header_v1_0 *hdr =
1201                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1202                         fw_data = (const __le32 *)
1203                                 (adev->gfx.mec2_fw->data +
1204                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1205                         table_offset = le32_to_cpu(hdr->jt_offset);
1206                         table_size = le32_to_cpu(hdr->jt_size);
1207                 }
1208
1209                 for (i = 0; i < table_size; i ++) {
1210                         dst_ptr[bo_offset + i] =
1211                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1212                 }
1213
1214                 bo_offset += table_size;
1215         }
1216 }
1217
1218 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1219 {
1220         int r;
1221
1222         /* clear state block */
1223         if (adev->gfx.rlc.clear_state_obj) {
1224                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1225                 if (unlikely(r != 0))
1226                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1227                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1228                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1229                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1230                 adev->gfx.rlc.clear_state_obj = NULL;
1231         }
1232
1233         /* jump table block */
1234         if (adev->gfx.rlc.cp_table_obj) {
1235                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1236                 if (unlikely(r != 0))
1237                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1238                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1239                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1240                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1241                 adev->gfx.rlc.cp_table_obj = NULL;
1242         }
1243 }
1244
1245 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1246 {
1247         volatile u32 *dst_ptr;
1248         u32 dws;
1249         const struct cs_section_def *cs_data;
1250         int r;
1251
1252         adev->gfx.rlc.cs_data = vi_cs_data;
1253
1254         cs_data = adev->gfx.rlc.cs_data;
1255
1256         if (cs_data) {
1257                 /* clear state block */
1258                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1259
1260                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1261                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1262                                              AMDGPU_GEM_DOMAIN_VRAM,
1263                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1264                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1265                                              NULL, NULL,
1266                                              &adev->gfx.rlc.clear_state_obj);
1267                         if (r) {
1268                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1269                                 gfx_v8_0_rlc_fini(adev);
1270                                 return r;
1271                         }
1272                 }
1273                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1274                 if (unlikely(r != 0)) {
1275                         gfx_v8_0_rlc_fini(adev);
1276                         return r;
1277                 }
1278                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1279                                   &adev->gfx.rlc.clear_state_gpu_addr);
1280                 if (r) {
1281                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1282                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1283                         gfx_v8_0_rlc_fini(adev);
1284                         return r;
1285                 }
1286
1287                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1288                 if (r) {
1289                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1290                         gfx_v8_0_rlc_fini(adev);
1291                         return r;
1292                 }
1293                 /* set up the cs buffer */
1294                 dst_ptr = adev->gfx.rlc.cs_ptr;
1295                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1296                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1297                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1298         }
1299
1300         if ((adev->asic_type == CHIP_CARRIZO) ||
1301             (adev->asic_type == CHIP_STONEY)) {
1302                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1303                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1304                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1305                                              AMDGPU_GEM_DOMAIN_VRAM,
1306                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1307                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1308                                              NULL, NULL,
1309                                              &adev->gfx.rlc.cp_table_obj);
1310                         if (r) {
1311                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1312                                 return r;
1313                         }
1314                 }
1315
1316                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1317                 if (unlikely(r != 0)) {
1318                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1319                         return r;
1320                 }
1321                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1322                                   &adev->gfx.rlc.cp_table_gpu_addr);
1323                 if (r) {
1324                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1325                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1326                         return r;
1327                 }
1328                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1329                 if (r) {
1330                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1331                         return r;
1332                 }
1333
1334                 cz_init_cp_jump_table(adev);
1335
1336                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1337                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1338         }
1339
1340         return 0;
1341 }
1342
1343 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1344 {
1345         int r;
1346
1347         if (adev->gfx.mec.hpd_eop_obj) {
1348                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1349                 if (unlikely(r != 0))
1350                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1351                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1352                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1353                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1354                 adev->gfx.mec.hpd_eop_obj = NULL;
1355         }
1356 }
1357
1358 #define MEC_HPD_SIZE 2048
1359
1360 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1361 {
1362         int r;
1363         u32 *hpd;
1364
1365         /*
1366          * we assign only 1 pipe because all other pipes will
1367          * be handled by KFD
1368          */
1369         adev->gfx.mec.num_mec = 1;
1370         adev->gfx.mec.num_pipe = 1;
1371         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1372
1373         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1374                 r = amdgpu_bo_create(adev,
1375                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1376                                      PAGE_SIZE, true,
1377                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1378                                      &adev->gfx.mec.hpd_eop_obj);
1379                 if (r) {
1380                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1381                         return r;
1382                 }
1383         }
1384
1385         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1386         if (unlikely(r != 0)) {
1387                 gfx_v8_0_mec_fini(adev);
1388                 return r;
1389         }
1390         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1391                           &adev->gfx.mec.hpd_eop_gpu_addr);
1392         if (r) {
1393                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1394                 gfx_v8_0_mec_fini(adev);
1395                 return r;
1396         }
1397         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1398         if (r) {
1399                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1400                 gfx_v8_0_mec_fini(adev);
1401                 return r;
1402         }
1403
1404         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1405
1406         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1407         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1408
1409         return 0;
1410 }
1411
1412 static const u32 vgpr_init_compute_shader[] =
1413 {
1414         0x7e000209, 0x7e020208,
1415         0x7e040207, 0x7e060206,
1416         0x7e080205, 0x7e0a0204,
1417         0x7e0c0203, 0x7e0e0202,
1418         0x7e100201, 0x7e120200,
1419         0x7e140209, 0x7e160208,
1420         0x7e180207, 0x7e1a0206,
1421         0x7e1c0205, 0x7e1e0204,
1422         0x7e200203, 0x7e220202,
1423         0x7e240201, 0x7e260200,
1424         0x7e280209, 0x7e2a0208,
1425         0x7e2c0207, 0x7e2e0206,
1426         0x7e300205, 0x7e320204,
1427         0x7e340203, 0x7e360202,
1428         0x7e380201, 0x7e3a0200,
1429         0x7e3c0209, 0x7e3e0208,
1430         0x7e400207, 0x7e420206,
1431         0x7e440205, 0x7e460204,
1432         0x7e480203, 0x7e4a0202,
1433         0x7e4c0201, 0x7e4e0200,
1434         0x7e500209, 0x7e520208,
1435         0x7e540207, 0x7e560206,
1436         0x7e580205, 0x7e5a0204,
1437         0x7e5c0203, 0x7e5e0202,
1438         0x7e600201, 0x7e620200,
1439         0x7e640209, 0x7e660208,
1440         0x7e680207, 0x7e6a0206,
1441         0x7e6c0205, 0x7e6e0204,
1442         0x7e700203, 0x7e720202,
1443         0x7e740201, 0x7e760200,
1444         0x7e780209, 0x7e7a0208,
1445         0x7e7c0207, 0x7e7e0206,
1446         0xbf8a0000, 0xbf810000,
1447 };
1448
1449 static const u32 sgpr_init_compute_shader[] =
1450 {
1451         0xbe8a0100, 0xbe8c0102,
1452         0xbe8e0104, 0xbe900106,
1453         0xbe920108, 0xbe940100,
1454         0xbe960102, 0xbe980104,
1455         0xbe9a0106, 0xbe9c0108,
1456         0xbe9e0100, 0xbea00102,
1457         0xbea20104, 0xbea40106,
1458         0xbea60108, 0xbea80100,
1459         0xbeaa0102, 0xbeac0104,
1460         0xbeae0106, 0xbeb00108,
1461         0xbeb20100, 0xbeb40102,
1462         0xbeb60104, 0xbeb80106,
1463         0xbeba0108, 0xbebc0100,
1464         0xbebe0102, 0xbec00104,
1465         0xbec20106, 0xbec40108,
1466         0xbec60100, 0xbec80102,
1467         0xbee60004, 0xbee70005,
1468         0xbeea0006, 0xbeeb0007,
1469         0xbee80008, 0xbee90009,
1470         0xbefc0000, 0xbf8a0000,
1471         0xbf810000, 0x00000000,
1472 };
1473
1474 static const u32 vgpr_init_regs[] =
1475 {
1476         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1477         mmCOMPUTE_RESOURCE_LIMITS, 0,
1478         mmCOMPUTE_NUM_THREAD_X, 256*4,
1479         mmCOMPUTE_NUM_THREAD_Y, 1,
1480         mmCOMPUTE_NUM_THREAD_Z, 1,
1481         mmCOMPUTE_PGM_RSRC2, 20,
1482         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1483         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1484         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1485         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1486         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1487         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1488         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1489         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1490         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1491         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1492 };
1493
1494 static const u32 sgpr1_init_regs[] =
1495 {
1496         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1497         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1498         mmCOMPUTE_NUM_THREAD_X, 256*5,
1499         mmCOMPUTE_NUM_THREAD_Y, 1,
1500         mmCOMPUTE_NUM_THREAD_Z, 1,
1501         mmCOMPUTE_PGM_RSRC2, 20,
1502         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1503         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1504         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1505         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1506         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1507         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1508         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1509         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1510         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1511         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1512 };
1513
1514 static const u32 sgpr2_init_regs[] =
1515 {
1516         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1517         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1518         mmCOMPUTE_NUM_THREAD_X, 256*5,
1519         mmCOMPUTE_NUM_THREAD_Y, 1,
1520         mmCOMPUTE_NUM_THREAD_Z, 1,
1521         mmCOMPUTE_PGM_RSRC2, 20,
1522         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1523         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1524         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1525         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1526         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1527         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1528         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1529         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1530         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1531         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1532 };
1533
1534 static const u32 sec_ded_counter_registers[] =
1535 {
1536         mmCPC_EDC_ATC_CNT,
1537         mmCPC_EDC_SCRATCH_CNT,
1538         mmCPC_EDC_UCODE_CNT,
1539         mmCPF_EDC_ATC_CNT,
1540         mmCPF_EDC_ROQ_CNT,
1541         mmCPF_EDC_TAG_CNT,
1542         mmCPG_EDC_ATC_CNT,
1543         mmCPG_EDC_DMA_CNT,
1544         mmCPG_EDC_TAG_CNT,
1545         mmDC_EDC_CSINVOC_CNT,
1546         mmDC_EDC_RESTORE_CNT,
1547         mmDC_EDC_STATE_CNT,
1548         mmGDS_EDC_CNT,
1549         mmGDS_EDC_GRBM_CNT,
1550         mmGDS_EDC_OA_DED,
1551         mmSPI_EDC_CNT,
1552         mmSQC_ATC_EDC_GATCL1_CNT,
1553         mmSQC_EDC_CNT,
1554         mmSQ_EDC_DED_CNT,
1555         mmSQ_EDC_INFO,
1556         mmSQ_EDC_SEC_CNT,
1557         mmTCC_EDC_CNT,
1558         mmTCP_ATC_EDC_GATCL1_CNT,
1559         mmTCP_EDC_CNT,
1560         mmTD_EDC_CNT
1561 };
1562
1563 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1564 {
1565         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1566         struct amdgpu_ib ib;
1567         struct fence *f = NULL;
1568         int r, i;
1569         u32 tmp;
1570         unsigned total_size, vgpr_offset, sgpr_offset;
1571         u64 gpu_addr;
1572
1573         /* only supported on CZ */
1574         if (adev->asic_type != CHIP_CARRIZO)
1575                 return 0;
1576
1577         /* bail if the compute ring is not ready */
1578         if (!ring->ready)
1579                 return 0;
1580
1581         tmp = RREG32(mmGB_EDC_MODE);
1582         WREG32(mmGB_EDC_MODE, 0);
1583
1584         total_size =
1585                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1586         total_size +=
1587                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1588         total_size +=
1589                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1590         total_size = ALIGN(total_size, 256);
1591         vgpr_offset = total_size;
1592         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1593         sgpr_offset = total_size;
1594         total_size += sizeof(sgpr_init_compute_shader);
1595
1596         /* allocate an indirect buffer to put the commands in */
1597         memset(&ib, 0, sizeof(ib));
1598         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1599         if (r) {
1600                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1601                 return r;
1602         }
1603
1604         /* load the compute shaders */
1605         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1606                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1607
1608         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1609                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1610
1611         /* init the ib length to 0 */
1612         ib.length_dw = 0;
1613
1614         /* VGPR */
1615         /* write the register state for the compute dispatch */
1616         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1617                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1618                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1619                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1620         }
1621         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1622         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1623         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1624         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1625         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1626         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1627
1628         /* write dispatch packet */
1629         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1630         ib.ptr[ib.length_dw++] = 8; /* x */
1631         ib.ptr[ib.length_dw++] = 1; /* y */
1632         ib.ptr[ib.length_dw++] = 1; /* z */
1633         ib.ptr[ib.length_dw++] =
1634                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1635
1636         /* write CS partial flush packet */
1637         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1638         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1639
1640         /* SGPR1 */
1641         /* write the register state for the compute dispatch */
1642         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1643                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1644                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1645                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1646         }
1647         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1648         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1649         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1650         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1651         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1652         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1653
1654         /* write dispatch packet */
1655         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1656         ib.ptr[ib.length_dw++] = 8; /* x */
1657         ib.ptr[ib.length_dw++] = 1; /* y */
1658         ib.ptr[ib.length_dw++] = 1; /* z */
1659         ib.ptr[ib.length_dw++] =
1660                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1661
1662         /* write CS partial flush packet */
1663         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1664         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1665
1666         /* SGPR2 */
1667         /* write the register state for the compute dispatch */
1668         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1669                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1670                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1671                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1672         }
1673         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1674         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1675         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1676         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1677         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1678         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1679
1680         /* write dispatch packet */
1681         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1682         ib.ptr[ib.length_dw++] = 8; /* x */
1683         ib.ptr[ib.length_dw++] = 1; /* y */
1684         ib.ptr[ib.length_dw++] = 1; /* z */
1685         ib.ptr[ib.length_dw++] =
1686                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1687
1688         /* write CS partial flush packet */
1689         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1690         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1691
1692         /* shedule the ib on the ring */
1693         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1694         if (r) {
1695                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1696                 goto fail;
1697         }
1698
1699         /* wait for the GPU to finish processing the IB */
1700         r = fence_wait(f, false);
1701         if (r) {
1702                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1703                 goto fail;
1704         }
1705
1706         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1707         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1708         WREG32(mmGB_EDC_MODE, tmp);
1709
1710         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1711         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1712         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1713
1714
1715         /* read back registers to clear the counters */
1716         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1717                 RREG32(sec_ded_counter_registers[i]);
1718
1719 fail:
1720         amdgpu_ib_free(adev, &ib, NULL);
1721         fence_put(f);
1722
1723         return r;
1724 }
1725
1726 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1727 {
1728         u32 gb_addr_config;
1729         u32 mc_shared_chmap, mc_arb_ramcfg;
1730         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1731         u32 tmp;
1732         int ret;
1733
1734         switch (adev->asic_type) {
1735         case CHIP_TOPAZ:
1736                 adev->gfx.config.max_shader_engines = 1;
1737                 adev->gfx.config.max_tile_pipes = 2;
1738                 adev->gfx.config.max_cu_per_sh = 6;
1739                 adev->gfx.config.max_sh_per_se = 1;
1740                 adev->gfx.config.max_backends_per_se = 2;
1741                 adev->gfx.config.max_texture_channel_caches = 2;
1742                 adev->gfx.config.max_gprs = 256;
1743                 adev->gfx.config.max_gs_threads = 32;
1744                 adev->gfx.config.max_hw_contexts = 8;
1745
1746                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1747                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1748                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1749                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1750                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1751                 break;
1752         case CHIP_FIJI:
1753                 adev->gfx.config.max_shader_engines = 4;
1754                 adev->gfx.config.max_tile_pipes = 16;
1755                 adev->gfx.config.max_cu_per_sh = 16;
1756                 adev->gfx.config.max_sh_per_se = 1;
1757                 adev->gfx.config.max_backends_per_se = 4;
1758                 adev->gfx.config.max_texture_channel_caches = 16;
1759                 adev->gfx.config.max_gprs = 256;
1760                 adev->gfx.config.max_gs_threads = 32;
1761                 adev->gfx.config.max_hw_contexts = 8;
1762
1763                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1768                 break;
1769         case CHIP_POLARIS11:
1770                 ret = amdgpu_atombios_get_gfx_info(adev);
1771                 if (ret)
1772                         return ret;
1773                 adev->gfx.config.max_gprs = 256;
1774                 adev->gfx.config.max_gs_threads = 32;
1775                 adev->gfx.config.max_hw_contexts = 8;
1776
1777                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1782                 break;
1783         case CHIP_POLARIS10:
1784                 ret = amdgpu_atombios_get_gfx_info(adev);
1785                 if (ret)
1786                         return ret;
1787                 adev->gfx.config.max_gprs = 256;
1788                 adev->gfx.config.max_gs_threads = 32;
1789                 adev->gfx.config.max_hw_contexts = 8;
1790
1791                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1792                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1793                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1794                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1795                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1796                 break;
1797         case CHIP_TONGA:
1798                 adev->gfx.config.max_shader_engines = 4;
1799                 adev->gfx.config.max_tile_pipes = 8;
1800                 adev->gfx.config.max_cu_per_sh = 8;
1801                 adev->gfx.config.max_sh_per_se = 1;
1802                 adev->gfx.config.max_backends_per_se = 2;
1803                 adev->gfx.config.max_texture_channel_caches = 8;
1804                 adev->gfx.config.max_gprs = 256;
1805                 adev->gfx.config.max_gs_threads = 32;
1806                 adev->gfx.config.max_hw_contexts = 8;
1807
1808                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1809                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1810                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1811                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1812                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1813                 break;
1814         case CHIP_CARRIZO:
1815                 adev->gfx.config.max_shader_engines = 1;
1816                 adev->gfx.config.max_tile_pipes = 2;
1817                 adev->gfx.config.max_sh_per_se = 1;
1818                 adev->gfx.config.max_backends_per_se = 2;
1819
1820                 switch (adev->pdev->revision) {
1821                 case 0xc4:
1822                 case 0x84:
1823                 case 0xc8:
1824                 case 0xcc:
1825                 case 0xe1:
1826                 case 0xe3:
1827                         /* B10 */
1828                         adev->gfx.config.max_cu_per_sh = 8;
1829                         break;
1830                 case 0xc5:
1831                 case 0x81:
1832                 case 0x85:
1833                 case 0xc9:
1834                 case 0xcd:
1835                 case 0xe2:
1836                 case 0xe4:
1837                         /* B8 */
1838                         adev->gfx.config.max_cu_per_sh = 6;
1839                         break;
1840                 case 0xc6:
1841                 case 0xca:
1842                 case 0xce:
1843                 case 0x88:
1844                         /* B6 */
1845                         adev->gfx.config.max_cu_per_sh = 6;
1846                         break;
1847                 case 0xc7:
1848                 case 0x87:
1849                 case 0xcb:
1850                 case 0xe5:
1851                 case 0x89:
1852                 default:
1853                         /* B4 */
1854                         adev->gfx.config.max_cu_per_sh = 4;
1855                         break;
1856                 }
1857
1858                 adev->gfx.config.max_texture_channel_caches = 2;
1859                 adev->gfx.config.max_gprs = 256;
1860                 adev->gfx.config.max_gs_threads = 32;
1861                 adev->gfx.config.max_hw_contexts = 8;
1862
1863                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1864                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1865                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1866                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1867                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1868                 break;
1869         case CHIP_STONEY:
1870                 adev->gfx.config.max_shader_engines = 1;
1871                 adev->gfx.config.max_tile_pipes = 2;
1872                 adev->gfx.config.max_sh_per_se = 1;
1873                 adev->gfx.config.max_backends_per_se = 1;
1874
1875                 switch (adev->pdev->revision) {
1876                 case 0xc0:
1877                 case 0xc1:
1878                 case 0xc2:
1879                 case 0xc4:
1880                 case 0xc8:
1881                 case 0xc9:
1882                         adev->gfx.config.max_cu_per_sh = 3;
1883                         break;
1884                 case 0xd0:
1885                 case 0xd1:
1886                 case 0xd2:
1887                 default:
1888                         adev->gfx.config.max_cu_per_sh = 2;
1889                         break;
1890                 }
1891
1892                 adev->gfx.config.max_texture_channel_caches = 2;
1893                 adev->gfx.config.max_gprs = 256;
1894                 adev->gfx.config.max_gs_threads = 16;
1895                 adev->gfx.config.max_hw_contexts = 8;
1896
1897                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1898                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1899                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1900                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1901                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1902                 break;
1903         default:
1904                 adev->gfx.config.max_shader_engines = 2;
1905                 adev->gfx.config.max_tile_pipes = 4;
1906                 adev->gfx.config.max_cu_per_sh = 2;
1907                 adev->gfx.config.max_sh_per_se = 1;
1908                 adev->gfx.config.max_backends_per_se = 2;
1909                 adev->gfx.config.max_texture_channel_caches = 4;
1910                 adev->gfx.config.max_gprs = 256;
1911                 adev->gfx.config.max_gs_threads = 32;
1912                 adev->gfx.config.max_hw_contexts = 8;
1913
1914                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1915                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1916                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1917                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1918                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1919                 break;
1920         }
1921
1922         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1923         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1924         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1925
1926         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1927         adev->gfx.config.mem_max_burst_length_bytes = 256;
1928         if (adev->flags & AMD_IS_APU) {
1929                 /* Get memory bank mapping mode. */
1930                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1931                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1932                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1933
1934                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1935                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1936                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1937
1938                 /* Validate settings in case only one DIMM installed. */
1939                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1940                         dimm00_addr_map = 0;
1941                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1942                         dimm01_addr_map = 0;
1943                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1944                         dimm10_addr_map = 0;
1945                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1946                         dimm11_addr_map = 0;
1947
1948                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1949                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1950                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1951                         adev->gfx.config.mem_row_size_in_kb = 2;
1952                 else
1953                         adev->gfx.config.mem_row_size_in_kb = 1;
1954         } else {
1955                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1956                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1957                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1958                         adev->gfx.config.mem_row_size_in_kb = 4;
1959         }
1960
1961         adev->gfx.config.shader_engine_tile_size = 32;
1962         adev->gfx.config.num_gpus = 1;
1963         adev->gfx.config.multi_gpu_tile_size = 64;
1964
1965         /* fix up row size */
1966         switch (adev->gfx.config.mem_row_size_in_kb) {
1967         case 1:
1968         default:
1969                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1970                 break;
1971         case 2:
1972                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1973                 break;
1974         case 4:
1975                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1976                 break;
1977         }
1978         adev->gfx.config.gb_addr_config = gb_addr_config;
1979
1980         return 0;
1981 }
1982
1983 static int gfx_v8_0_sw_init(void *handle)
1984 {
1985         int i, r;
1986         struct amdgpu_ring *ring;
1987         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1988
1989         /* EOP Event */
1990         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1991         if (r)
1992                 return r;
1993
1994         /* Privileged reg */
1995         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1996         if (r)
1997                 return r;
1998
1999         /* Privileged inst */
2000         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2001         if (r)
2002                 return r;
2003
2004         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2005
2006         gfx_v8_0_scratch_init(adev);
2007
2008         r = gfx_v8_0_init_microcode(adev);
2009         if (r) {
2010                 DRM_ERROR("Failed to load gfx firmware!\n");
2011                 return r;
2012         }
2013
2014         r = gfx_v8_0_rlc_init(adev);
2015         if (r) {
2016                 DRM_ERROR("Failed to init rlc BOs!\n");
2017                 return r;
2018         }
2019
2020         r = gfx_v8_0_mec_init(adev);
2021         if (r) {
2022                 DRM_ERROR("Failed to init MEC BOs!\n");
2023                 return r;
2024         }
2025
2026         /* set up the gfx ring */
2027         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2028                 ring = &adev->gfx.gfx_ring[i];
2029                 ring->ring_obj = NULL;
2030                 sprintf(ring->name, "gfx");
2031                 /* no gfx doorbells on iceland */
2032                 if (adev->asic_type != CHIP_TOPAZ) {
2033                         ring->use_doorbell = true;
2034                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2035                 }
2036
2037                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2038                                      AMDGPU_CP_IRQ_GFX_EOP);
2039                 if (r)
2040                         return r;
2041         }
2042
2043         /* set up the compute queues */
2044         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2045                 unsigned irq_type;
2046
2047                 /* max 32 queues per MEC */
2048                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2049                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2050                         break;
2051                 }
2052                 ring = &adev->gfx.compute_ring[i];
2053                 ring->ring_obj = NULL;
2054                 ring->use_doorbell = true;
2055                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2056                 ring->me = 1; /* first MEC */
2057                 ring->pipe = i / 8;
2058                 ring->queue = i % 8;
2059                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2060                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2061                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2062                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2063                                      irq_type);
2064                 if (r)
2065                         return r;
2066         }
2067
2068         /* reserve GDS, GWS and OA resource for gfx */
2069         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2070                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2071                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2072         if (r)
2073                 return r;
2074
2075         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2076                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2077                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2078         if (r)
2079                 return r;
2080
2081         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2082                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2083                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2084         if (r)
2085                 return r;
2086
2087         adev->gfx.ce_ram_size = 0x8000;
2088
2089         r = gfx_v8_0_gpu_early_init(adev);
2090         if (r)
2091                 return r;
2092
2093         return 0;
2094 }
2095
2096 static int gfx_v8_0_sw_fini(void *handle)
2097 {
2098         int i;
2099         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2100
2101         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2102         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2103         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2104
2105         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2106                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2107         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2108                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2109
2110         gfx_v8_0_mec_fini(adev);
2111         gfx_v8_0_rlc_fini(adev);
2112         gfx_v8_0_free_microcode(adev);
2113
2114         return 0;
2115 }
2116
2117 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2118 {
2119         uint32_t *modearray, *mod2array;
2120         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2121         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2122         u32 reg_offset;
2123
2124         modearray = adev->gfx.config.tile_mode_array;
2125         mod2array = adev->gfx.config.macrotile_mode_array;
2126
2127         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2128                 modearray[reg_offset] = 0;
2129
2130         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2131                 mod2array[reg_offset] = 0;
2132
2133         switch (adev->asic_type) {
2134         case CHIP_TOPAZ:
2135                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136                                 PIPE_CONFIG(ADDR_SURF_P2) |
2137                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2138                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2139                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2140                                 PIPE_CONFIG(ADDR_SURF_P2) |
2141                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2142                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2143                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2144                                 PIPE_CONFIG(ADDR_SURF_P2) |
2145                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2146                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2147                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148                                 PIPE_CONFIG(ADDR_SURF_P2) |
2149                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2150                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152                                 PIPE_CONFIG(ADDR_SURF_P2) |
2153                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2154                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156                                 PIPE_CONFIG(ADDR_SURF_P2) |
2157                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2158                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2160                                 PIPE_CONFIG(ADDR_SURF_P2) |
2161                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2162                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2164                                 PIPE_CONFIG(ADDR_SURF_P2));
2165                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2166                                 PIPE_CONFIG(ADDR_SURF_P2) |
2167                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2168                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2169                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2170                                  PIPE_CONFIG(ADDR_SURF_P2) |
2171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2173                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2174                                  PIPE_CONFIG(ADDR_SURF_P2) |
2175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2177                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178                                  PIPE_CONFIG(ADDR_SURF_P2) |
2179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182                                  PIPE_CONFIG(ADDR_SURF_P2) |
2183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2186                                  PIPE_CONFIG(ADDR_SURF_P2) |
2187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2190                                  PIPE_CONFIG(ADDR_SURF_P2) |
2191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2193                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2194                                  PIPE_CONFIG(ADDR_SURF_P2) |
2195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2197                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2198                                  PIPE_CONFIG(ADDR_SURF_P2) |
2199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2201                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2202                                  PIPE_CONFIG(ADDR_SURF_P2) |
2203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2205                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2206                                  PIPE_CONFIG(ADDR_SURF_P2) |
2207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2210                                  PIPE_CONFIG(ADDR_SURF_P2) |
2211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2214                                  PIPE_CONFIG(ADDR_SURF_P2) |
2215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2218                                  PIPE_CONFIG(ADDR_SURF_P2) |
2219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2222                                  PIPE_CONFIG(ADDR_SURF_P2) |
2223                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2224                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2226                                  PIPE_CONFIG(ADDR_SURF_P2) |
2227                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2228                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2229                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230                                  PIPE_CONFIG(ADDR_SURF_P2) |
2231                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2232                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2233                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2234                                  PIPE_CONFIG(ADDR_SURF_P2) |
2235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2237
2238                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2241                                 NUM_BANKS(ADDR_SURF_8_BANK));
2242                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2243                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2244                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2245                                 NUM_BANKS(ADDR_SURF_8_BANK));
2246                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2247                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2248                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249                                 NUM_BANKS(ADDR_SURF_8_BANK));
2250                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2251                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2253                                 NUM_BANKS(ADDR_SURF_8_BANK));
2254                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2256                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257                                 NUM_BANKS(ADDR_SURF_8_BANK));
2258                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261                                 NUM_BANKS(ADDR_SURF_8_BANK));
2262                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2264                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265                                 NUM_BANKS(ADDR_SURF_8_BANK));
2266                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2267                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2268                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2269                                 NUM_BANKS(ADDR_SURF_16_BANK));
2270                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2271                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2272                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2273                                 NUM_BANKS(ADDR_SURF_16_BANK));
2274                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2275                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2276                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2277                                  NUM_BANKS(ADDR_SURF_16_BANK));
2278                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2279                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2280                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281                                  NUM_BANKS(ADDR_SURF_16_BANK));
2282                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2283                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2284                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285                                  NUM_BANKS(ADDR_SURF_16_BANK));
2286                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2287                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2288                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289                                  NUM_BANKS(ADDR_SURF_16_BANK));
2290                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2292                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2293                                  NUM_BANKS(ADDR_SURF_8_BANK));
2294
2295                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2296                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2297                             reg_offset != 23)
2298                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2299
2300                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2301                         if (reg_offset != 7)
2302                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2303
2304                 break;
2305         case CHIP_FIJI:
2306                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2313                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2321                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2325                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2329                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2333                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2335                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2336                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2339                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2340                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2343                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2352                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2357                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2381                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2397                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2401                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2405                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2409                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428
2429                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432                                 NUM_BANKS(ADDR_SURF_8_BANK));
2433                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436                                 NUM_BANKS(ADDR_SURF_8_BANK));
2437                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444                                 NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2447                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448                                 NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452                                 NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456                                 NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2459                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460                                 NUM_BANKS(ADDR_SURF_8_BANK));
2461                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2463                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464                                 NUM_BANKS(ADDR_SURF_8_BANK));
2465                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                                  NUM_BANKS(ADDR_SURF_8_BANK));
2469                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                                  NUM_BANKS(ADDR_SURF_8_BANK));
2473                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2475                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476                                  NUM_BANKS(ADDR_SURF_8_BANK));
2477                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                                  NUM_BANKS(ADDR_SURF_8_BANK));
2481                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484                                  NUM_BANKS(ADDR_SURF_4_BANK));
2485
2486                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2487                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2488
2489                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2490                         if (reg_offset != 7)
2491                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2492
2493                 break;
2494         case CHIP_TONGA:
2495                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2502                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2506                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2510                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2514                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2516                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2518                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2522                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2524                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2525                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2528                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2529                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2532                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2570                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2586                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2590                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2594                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2598                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617
2618                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621                                 NUM_BANKS(ADDR_SURF_16_BANK));
2622                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625                                 NUM_BANKS(ADDR_SURF_16_BANK));
2626                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633                                 NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2636                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2637                                 NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641                                 NUM_BANKS(ADDR_SURF_16_BANK));
2642                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645                                 NUM_BANKS(ADDR_SURF_16_BANK));
2646                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2648                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2649                                 NUM_BANKS(ADDR_SURF_16_BANK));
2650                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2653                                 NUM_BANKS(ADDR_SURF_16_BANK));
2654                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2656                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657                                  NUM_BANKS(ADDR_SURF_16_BANK));
2658                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2661                                  NUM_BANKS(ADDR_SURF_16_BANK));
2662                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665                                  NUM_BANKS(ADDR_SURF_8_BANK));
2666                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2669                                  NUM_BANKS(ADDR_SURF_4_BANK));
2670                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2673                                  NUM_BANKS(ADDR_SURF_4_BANK));
2674
2675                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2676                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2677
2678                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2679                         if (reg_offset != 7)
2680                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2681
2682                 break;
2683         case CHIP_POLARIS11:
2684                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2692                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2695                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2696                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2699                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2700                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2703                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2707                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2708                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2711                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2712                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2715                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2716                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2717                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2718                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2721                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2725                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2729                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2730                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2733                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2734                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2737                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2739                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2743                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2745                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2749                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2753                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2754                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2755                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2757                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2759                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2761                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2763                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2765                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2766                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2767                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2769                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2771                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2773                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2774                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2775                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2777                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2778                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2779                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2782                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2783                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2785                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2786                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2787                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2789                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2790                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2791                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2793                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2797                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2799                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2802                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2806
2807                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2809                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810                                 NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2814                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2815                                 NUM_BANKS(ADDR_SURF_16_BANK));
2816
2817                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856
2857                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861
2862                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2864                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2865                                 NUM_BANKS(ADDR_SURF_16_BANK));
2866
2867                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2869                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2870                                 NUM_BANKS(ADDR_SURF_8_BANK));
2871
2872                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2875                                 NUM_BANKS(ADDR_SURF_4_BANK));
2876
2877                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2878                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2879
2880                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2881                         if (reg_offset != 7)
2882                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2883
2884                 break;
2885         case CHIP_POLARIS10:
2886                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2894                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2898                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2902                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2906                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2907                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2909                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2911                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2913                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2914                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2915                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2916                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2917                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2918                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2919                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2920                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2924                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2928                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2932                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2936                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2939                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2941                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2943                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2945                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2947                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2956                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2957                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2961                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2963                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2965                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2967                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2968                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2969                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2971                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2973                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2975                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2977                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2979                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2981                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2983                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2985                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2987                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2988                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2989                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2991                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2992                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2993                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2996                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2999                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3000                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3003                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3004                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3005                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3008
3009                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012                                 NUM_BANKS(ADDR_SURF_16_BANK));
3013
3014                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3016                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3017                                 NUM_BANKS(ADDR_SURF_16_BANK));
3018
3019                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3021                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                 NUM_BANKS(ADDR_SURF_16_BANK));
3023
3024                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3052                                 NUM_BANKS(ADDR_SURF_16_BANK));
3053
3054                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3057                                 NUM_BANKS(ADDR_SURF_16_BANK));
3058
3059                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3062                                 NUM_BANKS(ADDR_SURF_16_BANK));
3063
3064                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3066                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3067                                 NUM_BANKS(ADDR_SURF_8_BANK));
3068
3069                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3072                                 NUM_BANKS(ADDR_SURF_4_BANK));
3073
3074                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3077                                 NUM_BANKS(ADDR_SURF_4_BANK));
3078
3079                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3080                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3081
3082                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3083                         if (reg_offset != 7)
3084                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3085
3086                 break;
3087         case CHIP_STONEY:
3088                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3089                                 PIPE_CONFIG(ADDR_SURF_P2) |
3090                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3091                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3092                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3093                                 PIPE_CONFIG(ADDR_SURF_P2) |
3094                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3095                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3096                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3097                                 PIPE_CONFIG(ADDR_SURF_P2) |
3098                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3099                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3100                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3101                                 PIPE_CONFIG(ADDR_SURF_P2) |
3102                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3103                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3104                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3105                                 PIPE_CONFIG(ADDR_SURF_P2) |
3106                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3107                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3108                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3109                                 PIPE_CONFIG(ADDR_SURF_P2) |
3110                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3111                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3112                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3113                                 PIPE_CONFIG(ADDR_SURF_P2) |
3114                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3115                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3116                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3117                                 PIPE_CONFIG(ADDR_SURF_P2));
3118                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3119                                 PIPE_CONFIG(ADDR_SURF_P2) |
3120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3121                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3122                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123                                  PIPE_CONFIG(ADDR_SURF_P2) |
3124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3126                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127                                  PIPE_CONFIG(ADDR_SURF_P2) |
3128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3130                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3131                                  PIPE_CONFIG(ADDR_SURF_P2) |
3132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135                                  PIPE_CONFIG(ADDR_SURF_P2) |
3136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3138                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3139                                  PIPE_CONFIG(ADDR_SURF_P2) |
3140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3142                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3143                                  PIPE_CONFIG(ADDR_SURF_P2) |
3144                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3145                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3146                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3147                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3149                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3151                                  PIPE_CONFIG(ADDR_SURF_P2) |
3152                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3153                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3154                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3155                                  PIPE_CONFIG(ADDR_SURF_P2) |
3156                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3157                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3158                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3159                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3161                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3162                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3163                                  PIPE_CONFIG(ADDR_SURF_P2) |
3164                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3165                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3166                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3167                                  PIPE_CONFIG(ADDR_SURF_P2) |
3168                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3169                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3170                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3171                                  PIPE_CONFIG(ADDR_SURF_P2) |
3172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3174                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3175                                  PIPE_CONFIG(ADDR_SURF_P2) |
3176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3178                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3179                                  PIPE_CONFIG(ADDR_SURF_P2) |
3180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3183                                  PIPE_CONFIG(ADDR_SURF_P2) |
3184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3186                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3187                                  PIPE_CONFIG(ADDR_SURF_P2) |
3188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3190
3191                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3193                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3194                                 NUM_BANKS(ADDR_SURF_8_BANK));
3195                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3196                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3197                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3198                                 NUM_BANKS(ADDR_SURF_8_BANK));
3199                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3200                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3201                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3202                                 NUM_BANKS(ADDR_SURF_8_BANK));
3203                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3204                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3205                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3206                                 NUM_BANKS(ADDR_SURF_8_BANK));
3207                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3209                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3210                                 NUM_BANKS(ADDR_SURF_8_BANK));
3211                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3214                                 NUM_BANKS(ADDR_SURF_8_BANK));
3215                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3218                                 NUM_BANKS(ADDR_SURF_8_BANK));
3219                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3220                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3221                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3222                                 NUM_BANKS(ADDR_SURF_16_BANK));
3223                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3224                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3225                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3226                                 NUM_BANKS(ADDR_SURF_16_BANK));
3227                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3228                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3229                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3230                                  NUM_BANKS(ADDR_SURF_16_BANK));
3231                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3232                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3233                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234                                  NUM_BANKS(ADDR_SURF_16_BANK));
3235                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3236                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3237                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3238                                  NUM_BANKS(ADDR_SURF_16_BANK));
3239                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3240                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3241                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3242                                  NUM_BANKS(ADDR_SURF_16_BANK));
3243                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3244                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3245                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3246                                  NUM_BANKS(ADDR_SURF_8_BANK));
3247
3248                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3249                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3250                             reg_offset != 23)
3251                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3252
3253                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3254                         if (reg_offset != 7)
3255                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3256
3257                 break;
3258         default:
3259                 dev_warn(adev->dev,
3260                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3261                          adev->asic_type);
3262
3263         case CHIP_CARRIZO:
3264                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3265                                 PIPE_CONFIG(ADDR_SURF_P2) |
3266                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3267                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3268                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3269                                 PIPE_CONFIG(ADDR_SURF_P2) |
3270                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3271                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3272                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3273                                 PIPE_CONFIG(ADDR_SURF_P2) |
3274                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3275                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3276                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277                                 PIPE_CONFIG(ADDR_SURF_P2) |
3278                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3279                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3280                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3281                                 PIPE_CONFIG(ADDR_SURF_P2) |
3282                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3283                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3284                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3285                                 PIPE_CONFIG(ADDR_SURF_P2) |
3286                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3287                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3288                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3289                                 PIPE_CONFIG(ADDR_SURF_P2) |
3290                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3291                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3292                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3293                                 PIPE_CONFIG(ADDR_SURF_P2));
3294                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3295                                 PIPE_CONFIG(ADDR_SURF_P2) |
3296                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3297                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3298                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3299                                  PIPE_CONFIG(ADDR_SURF_P2) |
3300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3302                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3303                                  PIPE_CONFIG(ADDR_SURF_P2) |
3304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3306                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3307                                  PIPE_CONFIG(ADDR_SURF_P2) |
3308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3311                                  PIPE_CONFIG(ADDR_SURF_P2) |
3312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3314                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3315                                  PIPE_CONFIG(ADDR_SURF_P2) |
3316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3318                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3319                                  PIPE_CONFIG(ADDR_SURF_P2) |
3320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3322                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3323                                  PIPE_CONFIG(ADDR_SURF_P2) |
3324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3326                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3327                                  PIPE_CONFIG(ADDR_SURF_P2) |
3328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3330                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3331                                  PIPE_CONFIG(ADDR_SURF_P2) |
3332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3334                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3335                                  PIPE_CONFIG(ADDR_SURF_P2) |
3336                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3337                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3338                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3339                                  PIPE_CONFIG(ADDR_SURF_P2) |
3340                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3341                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3342                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3343                                  PIPE_CONFIG(ADDR_SURF_P2) |
3344                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3345                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3346                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3347                                  PIPE_CONFIG(ADDR_SURF_P2) |
3348                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3349                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3350                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3351                                  PIPE_CONFIG(ADDR_SURF_P2) |
3352                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3353                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3354                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3355                                  PIPE_CONFIG(ADDR_SURF_P2) |
3356                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3357                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3358                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3359                                  PIPE_CONFIG(ADDR_SURF_P2) |
3360                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3361                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3362                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3363                                  PIPE_CONFIG(ADDR_SURF_P2) |
3364                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3365                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3366
3367                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3369                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3370                                 NUM_BANKS(ADDR_SURF_8_BANK));
3371                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3372                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3373                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3374                                 NUM_BANKS(ADDR_SURF_8_BANK));
3375                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3376                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3377                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3378                                 NUM_BANKS(ADDR_SURF_8_BANK));
3379                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3381                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3382                                 NUM_BANKS(ADDR_SURF_8_BANK));
3383                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3385                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3386                                 NUM_BANKS(ADDR_SURF_8_BANK));
3387                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3388                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3389                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3390                                 NUM_BANKS(ADDR_SURF_8_BANK));
3391                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3392                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3393                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3394                                 NUM_BANKS(ADDR_SURF_8_BANK));
3395                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3396                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3397                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3398                                 NUM_BANKS(ADDR_SURF_16_BANK));
3399                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3400                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3401                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3402                                 NUM_BANKS(ADDR_SURF_16_BANK));
3403                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3404                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3405                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3406                                  NUM_BANKS(ADDR_SURF_16_BANK));
3407                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3408                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3409                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3410                                  NUM_BANKS(ADDR_SURF_16_BANK));
3411                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3412                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3413                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3414                                  NUM_BANKS(ADDR_SURF_16_BANK));
3415                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3416                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3417                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3418                                  NUM_BANKS(ADDR_SURF_16_BANK));
3419                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3420                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3421                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3422                                  NUM_BANKS(ADDR_SURF_8_BANK));
3423
3424                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3425                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3426                             reg_offset != 23)
3427                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3428
3429                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3430                         if (reg_offset != 7)
3431                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3432
3433                 break;
3434         }
3435 }
3436
3437 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3438                                   u32 se_num, u32 sh_num, u32 instance)
3439 {
3440         u32 data;
3441
3442         if (instance == 0xffffffff)
3443                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3444         else
3445                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3446
3447         if (se_num == 0xffffffff)
3448                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3449         else
3450                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3451
3452         if (sh_num == 0xffffffff)
3453                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3454         else
3455                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3456
3457         WREG32(mmGRBM_GFX_INDEX, data);
3458 }
3459
3460 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3461 {
3462         return (u32)((1ULL << bit_width) - 1);
3463 }
3464
3465 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3466 {
3467         u32 data, mask;
3468
3469         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3470                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3471
3472         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3473
3474         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3475                                        adev->gfx.config.max_sh_per_se);
3476
3477         return (~data) & mask;
3478 }
3479
3480 static void
3481 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3482 {
3483         switch (adev->asic_type) {
3484         case CHIP_FIJI:
3485                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3486                           RB_XSEL2(1) | PKR_MAP(2) |
3487                           PKR_XSEL(1) | PKR_YSEL(1) |
3488                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3489                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3490                            SE_PAIR_YSEL(2);
3491                 break;
3492         case CHIP_TONGA:
3493         case CHIP_POLARIS10:
3494                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3495                           SE_XSEL(1) | SE_YSEL(1);
3496                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3497                            SE_PAIR_YSEL(2);
3498                 break;
3499         case CHIP_TOPAZ:
3500         case CHIP_CARRIZO:
3501                 *rconf |= RB_MAP_PKR0(2);
3502                 *rconf1 |= 0x0;
3503                 break;
3504         case CHIP_POLARIS11:
3505                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3506                           SE_XSEL(1) | SE_YSEL(1);
3507                 *rconf1 |= 0x0;
3508                 break;
3509         case CHIP_STONEY:
3510                 *rconf |= 0x0;
3511                 *rconf1 |= 0x0;
3512                 break;
3513         default:
3514                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3515                 break;
3516         }
3517 }
3518
3519 static void
3520 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3521                                         u32 raster_config, u32 raster_config_1,
3522                                         unsigned rb_mask, unsigned num_rb)
3523 {
3524         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3525         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3526         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3527         unsigned rb_per_se = num_rb / num_se;
3528         unsigned se_mask[4];
3529         unsigned se;
3530
3531         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3532         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3533         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3534         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3535
3536         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3537         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3538         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3539
3540         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3541                              (!se_mask[2] && !se_mask[3]))) {
3542                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3543
3544                 if (!se_mask[0] && !se_mask[1]) {
3545                         raster_config_1 |=
3546                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3547                 } else {
3548                         raster_config_1 |=
3549                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3550                 }
3551         }
3552
3553         for (se = 0; se < num_se; se++) {
3554                 unsigned raster_config_se = raster_config;
3555                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3556                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3557                 int idx = (se / 2) * 2;
3558
3559                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3560                         raster_config_se &= ~SE_MAP_MASK;
3561
3562                         if (!se_mask[idx]) {
3563                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3564                         } else {
3565                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3566                         }
3567                 }
3568
3569                 pkr0_mask &= rb_mask;
3570                 pkr1_mask &= rb_mask;
3571                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3572                         raster_config_se &= ~PKR_MAP_MASK;
3573
3574                         if (!pkr0_mask) {
3575                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3576                         } else {
3577                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3578                         }
3579                 }
3580
3581                 if (rb_per_se >= 2) {
3582                         unsigned rb0_mask = 1 << (se * rb_per_se);
3583                         unsigned rb1_mask = rb0_mask << 1;
3584
3585                         rb0_mask &= rb_mask;
3586                         rb1_mask &= rb_mask;
3587                         if (!rb0_mask || !rb1_mask) {
3588                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3589
3590                                 if (!rb0_mask) {
3591                                         raster_config_se |=
3592                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3593                                 } else {
3594                                         raster_config_se |=
3595                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3596                                 }
3597                         }
3598
3599                         if (rb_per_se > 2) {
3600                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3601                                 rb1_mask = rb0_mask << 1;
3602                                 rb0_mask &= rb_mask;
3603                                 rb1_mask &= rb_mask;
3604                                 if (!rb0_mask || !rb1_mask) {
3605                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3606
3607                                         if (!rb0_mask) {
3608                                                 raster_config_se |=
3609                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3610                                         } else {
3611                                                 raster_config_se |=
3612                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3613                                         }
3614                                 }
3615                         }
3616                 }
3617
3618                 /* GRBM_GFX_INDEX has a different offset on VI */
3619                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3620                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3621                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3622         }
3623
3624         /* GRBM_GFX_INDEX has a different offset on VI */
3625         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3626 }
3627
3628 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3629 {
3630         int i, j;
3631         u32 data;
3632         u32 raster_config = 0, raster_config_1 = 0;
3633         u32 active_rbs = 0;
3634         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3635                                         adev->gfx.config.max_sh_per_se;
3636         unsigned num_rb_pipes;
3637
3638         mutex_lock(&adev->grbm_idx_mutex);
3639         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3640                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3641                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3642                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3643                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3644                                                rb_bitmap_width_per_sh);
3645                 }
3646         }
3647         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3648
3649         adev->gfx.config.backend_enable_mask = active_rbs;
3650         adev->gfx.config.num_rbs = hweight32(active_rbs);
3651
3652         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3653                              adev->gfx.config.max_shader_engines, 16);
3654
3655         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3656
3657         if (!adev->gfx.config.backend_enable_mask ||
3658                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3659                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3660                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3661         } else {
3662                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3663                                                         adev->gfx.config.backend_enable_mask,
3664                                                         num_rb_pipes);
3665         }
3666
3667         /* cache the values for userspace */
3668         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3669                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3670                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3671                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3672                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3673                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3674                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3675                         adev->gfx.config.rb_config[i][j].raster_config =
3676                                 RREG32(mmPA_SC_RASTER_CONFIG);
3677                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3678                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3679                 }
3680         }
3681         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3682         mutex_unlock(&adev->grbm_idx_mutex);
3683 }
3684
3685 /**
3686  * gfx_v8_0_init_compute_vmid - gart enable
3687  *
3688  * @rdev: amdgpu_device pointer
3689  *
3690  * Initialize compute vmid sh_mem registers
3691  *
3692  */
3693 #define DEFAULT_SH_MEM_BASES    (0x6000)
3694 #define FIRST_COMPUTE_VMID      (8)
3695 #define LAST_COMPUTE_VMID       (16)
3696 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3697 {
3698         int i;
3699         uint32_t sh_mem_config;
3700         uint32_t sh_mem_bases;
3701
3702         /*
3703          * Configure apertures:
3704          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3705          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3706          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3707          */
3708         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3709
3710         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3711                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3712                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3713                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3714                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3715                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3716
3717         mutex_lock(&adev->srbm_mutex);
3718         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3719                 vi_srbm_select(adev, 0, 0, 0, i);
3720                 /* CP and shaders */
3721                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3722                 WREG32(mmSH_MEM_APE1_BASE, 1);
3723                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3724                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3725         }
3726         vi_srbm_select(adev, 0, 0, 0, 0);
3727         mutex_unlock(&adev->srbm_mutex);
3728 }
3729
3730 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3731 {
3732         u32 tmp;
3733         int i;
3734
3735         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3736         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3737         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3739
3740         gfx_v8_0_tiling_mode_table_init(adev);
3741         gfx_v8_0_setup_rb(adev);
3742         gfx_v8_0_get_cu_info(adev);
3743
3744         /* XXX SH_MEM regs */
3745         /* where to put LDS, scratch, GPUVM in FSA64 space */
3746         mutex_lock(&adev->srbm_mutex);
3747         for (i = 0; i < 16; i++) {
3748                 vi_srbm_select(adev, 0, 0, 0, i);
3749                 /* CP and shaders */
3750                 if (i == 0) {
3751                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3752                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3753                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3754                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3755                         WREG32(mmSH_MEM_CONFIG, tmp);
3756                 } else {
3757                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3758                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3759                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3760                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3761                         WREG32(mmSH_MEM_CONFIG, tmp);
3762                 }
3763
3764                 WREG32(mmSH_MEM_APE1_BASE, 1);
3765                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3766                 WREG32(mmSH_MEM_BASES, 0);
3767         }
3768         vi_srbm_select(adev, 0, 0, 0, 0);
3769         mutex_unlock(&adev->srbm_mutex);
3770
3771         gfx_v8_0_init_compute_vmid(adev);
3772
3773         mutex_lock(&adev->grbm_idx_mutex);
3774         /*
3775          * making sure that the following register writes will be broadcasted
3776          * to all the shaders
3777          */
3778         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3779
3780         WREG32(mmPA_SC_FIFO_SIZE,
3781                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3782                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3783                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3784                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3785                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3786                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3787                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3788                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3789         mutex_unlock(&adev->grbm_idx_mutex);
3790
3791 }
3792
3793 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3794 {
3795         u32 i, j, k;
3796         u32 mask;
3797
3798         mutex_lock(&adev->grbm_idx_mutex);
3799         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3800                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3801                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3802                         for (k = 0; k < adev->usec_timeout; k++) {
3803                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3804                                         break;
3805                                 udelay(1);
3806                         }
3807                 }
3808         }
3809         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3810         mutex_unlock(&adev->grbm_idx_mutex);
3811
3812         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3813                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3814                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3815                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3816         for (k = 0; k < adev->usec_timeout; k++) {
3817                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3818                         break;
3819                 udelay(1);
3820         }
3821 }
3822
3823 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3824                                                bool enable)
3825 {
3826         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3827
3828         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3829         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3830         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3831         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3832
3833         WREG32(mmCP_INT_CNTL_RING0, tmp);
3834 }
3835
3836 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3837 {
3838         /* csib */
3839         WREG32(mmRLC_CSIB_ADDR_HI,
3840                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3841         WREG32(mmRLC_CSIB_ADDR_LO,
3842                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3843         WREG32(mmRLC_CSIB_LENGTH,
3844                         adev->gfx.rlc.clear_state_size);
3845 }
3846
3847 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3848                                 int ind_offset,
3849                                 int list_size,
3850                                 int *unique_indices,
3851                                 int *indices_count,
3852                                 int max_indices,
3853                                 int *ind_start_offsets,
3854                                 int *offset_count,
3855                                 int max_offset)
3856 {
3857         int indices;
3858         bool new_entry = true;
3859
3860         for (; ind_offset < list_size; ind_offset++) {
3861
3862                 if (new_entry) {
3863                         new_entry = false;
3864                         ind_start_offsets[*offset_count] = ind_offset;
3865                         *offset_count = *offset_count + 1;
3866                         BUG_ON(*offset_count >= max_offset);
3867                 }
3868
3869                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3870                         new_entry = true;
3871                         continue;
3872                 }
3873
3874                 ind_offset += 2;
3875
3876                 /* look for the matching indice */
3877                 for (indices = 0;
3878                         indices < *indices_count;
3879                         indices++) {
3880                         if (unique_indices[indices] ==
3881                                 register_list_format[ind_offset])
3882                                 break;
3883                 }
3884
3885                 if (indices >= *indices_count) {
3886                         unique_indices[*indices_count] =
3887                                 register_list_format[ind_offset];
3888                         indices = *indices_count;
3889                         *indices_count = *indices_count + 1;
3890                         BUG_ON(*indices_count >= max_indices);
3891                 }
3892
3893                 register_list_format[ind_offset] = indices;
3894         }
3895 }
3896
3897 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3898 {
3899         int i, temp, data;
3900         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3901         int indices_count = 0;
3902         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3903         int offset_count = 0;
3904
3905         int list_size;
3906         unsigned int *register_list_format =
3907                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3908         if (register_list_format == NULL)
3909                 return -ENOMEM;
3910         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3911                         adev->gfx.rlc.reg_list_format_size_bytes);
3912
3913         gfx_v8_0_parse_ind_reg_list(register_list_format,
3914                                 RLC_FormatDirectRegListLength,
3915                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3916                                 unique_indices,
3917                                 &indices_count,
3918                                 sizeof(unique_indices) / sizeof(int),
3919                                 indirect_start_offsets,
3920                                 &offset_count,
3921                                 sizeof(indirect_start_offsets)/sizeof(int));
3922
3923         /* save and restore list */
3924         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3925
3926         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3927         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3928                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3929
3930         /* indirect list */
3931         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3932         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3933                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3934
3935         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3936         list_size = list_size >> 1;
3937         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3938         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3939
3940         /* starting offsets starts */
3941         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3942                 adev->gfx.rlc.starting_offsets_start);
3943         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3944                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3945                                 indirect_start_offsets[i]);
3946
3947         /* unique indices */
3948         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3949         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3950         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3951                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3952                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3953         }
3954         kfree(register_list_format);
3955
3956         return 0;
3957 }
3958
3959 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3960 {
3961         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3962 }
3963
3964 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3965 {
3966         uint32_t data;
3967
3968         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3969                               AMD_PG_SUPPORT_GFX_SMG |
3970                               AMD_PG_SUPPORT_GFX_DMG)) {
3971                 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3972
3973                 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3974                 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3975                 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3976                 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3977                 WREG32(mmRLC_PG_DELAY, data);
3978
3979                 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3980                 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3981         }
3982 }
3983
3984 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3985                                                 bool enable)
3986 {
3987         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3988 }
3989
3990 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3991                                                   bool enable)
3992 {
3993         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3994 }
3995
3996 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3997 {
3998         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3999 }
4000
4001 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4002 {
4003         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4004                               AMD_PG_SUPPORT_GFX_SMG |
4005                               AMD_PG_SUPPORT_GFX_DMG |
4006                               AMD_PG_SUPPORT_CP |
4007                               AMD_PG_SUPPORT_GDS |
4008                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
4009                 gfx_v8_0_init_csb(adev);
4010                 gfx_v8_0_init_save_restore_list(adev);
4011                 gfx_v8_0_enable_save_restore_machine(adev);
4012
4013                 if ((adev->asic_type == CHIP_CARRIZO) ||
4014                     (adev->asic_type == CHIP_STONEY)) {
4015                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4016                         gfx_v8_0_init_power_gating(adev);
4017                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4018                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4019                                 cz_enable_sck_slow_down_on_power_up(adev, true);
4020                                 cz_enable_sck_slow_down_on_power_down(adev, true);
4021                         } else {
4022                                 cz_enable_sck_slow_down_on_power_up(adev, false);
4023                                 cz_enable_sck_slow_down_on_power_down(adev, false);
4024                         }
4025                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4026                                 cz_enable_cp_power_gating(adev, true);
4027                         else
4028                                 cz_enable_cp_power_gating(adev, false);
4029                 } else if (adev->asic_type == CHIP_POLARIS11) {
4030                         gfx_v8_0_init_power_gating(adev);
4031                 }
4032         }
4033 }
4034
4035 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4036 {
4037         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4038
4039         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4040         gfx_v8_0_wait_for_rlc_serdes(adev);
4041 }
4042
4043 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4044 {
4045         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4046         udelay(50);
4047
4048         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4049         udelay(50);
4050 }
4051
4052 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4053 {
4054         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4055
4056         /* carrizo do enable cp interrupt after cp inited */
4057         if (!(adev->flags & AMD_IS_APU))
4058                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4059
4060         udelay(50);
4061 }
4062
4063 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4064 {
4065         const struct rlc_firmware_header_v2_0 *hdr;
4066         const __le32 *fw_data;
4067         unsigned i, fw_size;
4068
4069         if (!adev->gfx.rlc_fw)
4070                 return -EINVAL;
4071
4072         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4073         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4074
4075         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4076                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4077         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4078
4079         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4080         for (i = 0; i < fw_size; i++)
4081                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4082         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4083
4084         return 0;
4085 }
4086
4087 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4088 {
4089         int r;
4090         u32 tmp;
4091
4092         gfx_v8_0_rlc_stop(adev);
4093
4094         /* disable CG */
4095         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4096         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4097                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4098         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4099         if (adev->asic_type == CHIP_POLARIS11 ||
4100             adev->asic_type == CHIP_POLARIS10) {
4101                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4102                 tmp &= ~0x3;
4103                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4104         }
4105
4106         /* disable PG */
4107         WREG32(mmRLC_PG_CNTL, 0);
4108
4109         gfx_v8_0_rlc_reset(adev);
4110         gfx_v8_0_init_pg(adev);
4111
4112         if (!adev->pp_enabled) {
4113                 if (!adev->firmware.smu_load) {
4114                         /* legacy rlc firmware loading */
4115                         r = gfx_v8_0_rlc_load_microcode(adev);
4116                         if (r)
4117                                 return r;
4118                 } else {
4119                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4120                                                         AMDGPU_UCODE_ID_RLC_G);
4121                         if (r)
4122                                 return -EINVAL;
4123                 }
4124         }
4125
4126         gfx_v8_0_rlc_start(adev);
4127
4128         return 0;
4129 }
4130
4131 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4132 {
4133         int i;
4134         u32 tmp = RREG32(mmCP_ME_CNTL);
4135
4136         if (enable) {
4137                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4138                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4139                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4140         } else {
4141                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4142                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4143                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4144                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4145                         adev->gfx.gfx_ring[i].ready = false;
4146         }
4147         WREG32(mmCP_ME_CNTL, tmp);
4148         udelay(50);
4149 }
4150
4151 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4152 {
4153         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4154         const struct gfx_firmware_header_v1_0 *ce_hdr;
4155         const struct gfx_firmware_header_v1_0 *me_hdr;
4156         const __le32 *fw_data;
4157         unsigned i, fw_size;
4158
4159         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4160                 return -EINVAL;
4161
4162         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4163                 adev->gfx.pfp_fw->data;
4164         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4165                 adev->gfx.ce_fw->data;
4166         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4167                 adev->gfx.me_fw->data;
4168
4169         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4170         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4171         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4172
4173         gfx_v8_0_cp_gfx_enable(adev, false);
4174
4175         /* PFP */
4176         fw_data = (const __le32 *)
4177                 (adev->gfx.pfp_fw->data +
4178                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4179         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4180         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4181         for (i = 0; i < fw_size; i++)
4182                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4183         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4184
4185         /* CE */
4186         fw_data = (const __le32 *)
4187                 (adev->gfx.ce_fw->data +
4188                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4189         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4190         WREG32(mmCP_CE_UCODE_ADDR, 0);
4191         for (i = 0; i < fw_size; i++)
4192                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4193         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4194
4195         /* ME */
4196         fw_data = (const __le32 *)
4197                 (adev->gfx.me_fw->data +
4198                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4199         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4200         WREG32(mmCP_ME_RAM_WADDR, 0);
4201         for (i = 0; i < fw_size; i++)
4202                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4203         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4204
4205         return 0;
4206 }
4207
4208 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4209 {
4210         u32 count = 0;
4211         const struct cs_section_def *sect = NULL;
4212         const struct cs_extent_def *ext = NULL;
4213
4214         /* begin clear state */
4215         count += 2;
4216         /* context control state */
4217         count += 3;
4218
4219         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4220                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4221                         if (sect->id == SECT_CONTEXT)
4222                                 count += 2 + ext->reg_count;
4223                         else
4224                                 return 0;
4225                 }
4226         }
4227         /* pa_sc_raster_config/pa_sc_raster_config1 */
4228         count += 4;
4229         /* end clear state */
4230         count += 2;
4231         /* clear state */
4232         count += 2;
4233
4234         return count;
4235 }
4236
4237 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4238 {
4239         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4240         const struct cs_section_def *sect = NULL;
4241         const struct cs_extent_def *ext = NULL;
4242         int r, i;
4243
4244         /* init the CP */
4245         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4246         WREG32(mmCP_ENDIAN_SWAP, 0);
4247         WREG32(mmCP_DEVICE_ID, 1);
4248
4249         gfx_v8_0_cp_gfx_enable(adev, true);
4250
4251         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4252         if (r) {
4253                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4254                 return r;
4255         }
4256
4257         /* clear state buffer */
4258         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4259         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4260
4261         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4262         amdgpu_ring_write(ring, 0x80000000);
4263         amdgpu_ring_write(ring, 0x80000000);
4264
4265         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4266                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4267                         if (sect->id == SECT_CONTEXT) {
4268                                 amdgpu_ring_write(ring,
4269                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4270                                                ext->reg_count));
4271                                 amdgpu_ring_write(ring,
4272                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4273                                 for (i = 0; i < ext->reg_count; i++)
4274                                         amdgpu_ring_write(ring, ext->extent[i]);
4275                         }
4276                 }
4277         }
4278
4279         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4280         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4281         switch (adev->asic_type) {
4282         case CHIP_TONGA:
4283         case CHIP_POLARIS10:
4284                 amdgpu_ring_write(ring, 0x16000012);
4285                 amdgpu_ring_write(ring, 0x0000002A);
4286                 break;
4287         case CHIP_POLARIS11:
4288                 amdgpu_ring_write(ring, 0x16000012);
4289                 amdgpu_ring_write(ring, 0x00000000);
4290                 break;
4291         case CHIP_FIJI:
4292                 amdgpu_ring_write(ring, 0x3a00161a);
4293                 amdgpu_ring_write(ring, 0x0000002e);
4294                 break;
4295         case CHIP_CARRIZO:
4296                 amdgpu_ring_write(ring, 0x00000002);
4297                 amdgpu_ring_write(ring, 0x00000000);
4298                 break;
4299         case CHIP_TOPAZ:
4300                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4301                                 0x00000000 : 0x00000002);
4302                 amdgpu_ring_write(ring, 0x00000000);
4303                 break;
4304         case CHIP_STONEY:
4305                 amdgpu_ring_write(ring, 0x00000000);
4306                 amdgpu_ring_write(ring, 0x00000000);
4307                 break;
4308         default:
4309                 BUG();
4310         }
4311
4312         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4313         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4314
4315         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4316         amdgpu_ring_write(ring, 0);
4317
4318         /* init the CE partitions */
4319         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4320         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4321         amdgpu_ring_write(ring, 0x8000);
4322         amdgpu_ring_write(ring, 0x8000);
4323
4324         amdgpu_ring_commit(ring);
4325
4326         return 0;
4327 }
4328
4329 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4330 {
4331         struct amdgpu_ring *ring;
4332         u32 tmp;
4333         u32 rb_bufsz;
4334         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4335         int r;
4336
4337         /* Set the write pointer delay */
4338         WREG32(mmCP_RB_WPTR_DELAY, 0);
4339
4340         /* set the RB to use vmid 0 */
4341         WREG32(mmCP_RB_VMID, 0);
4342
4343         /* Set ring buffer size */
4344         ring = &adev->gfx.gfx_ring[0];
4345         rb_bufsz = order_base_2(ring->ring_size / 8);
4346         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4347         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4348         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4349         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4350 #ifdef __BIG_ENDIAN
4351         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4352 #endif
4353         WREG32(mmCP_RB0_CNTL, tmp);
4354
4355         /* Initialize the ring buffer's read and write pointers */
4356         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4357         ring->wptr = 0;
4358         WREG32(mmCP_RB0_WPTR, ring->wptr);
4359
4360         /* set the wb address wether it's enabled or not */
4361         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4362         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4363         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4364
4365         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4366         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4367         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4368         mdelay(1);
4369         WREG32(mmCP_RB0_CNTL, tmp);
4370
4371         rb_addr = ring->gpu_addr >> 8;
4372         WREG32(mmCP_RB0_BASE, rb_addr);
4373         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4374
4375         /* no gfx doorbells on iceland */
4376         if (adev->asic_type != CHIP_TOPAZ) {
4377                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4378                 if (ring->use_doorbell) {
4379                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4380                                             DOORBELL_OFFSET, ring->doorbell_index);
4381                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4382                                             DOORBELL_HIT, 0);
4383                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4384                                             DOORBELL_EN, 1);
4385                 } else {
4386                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4387                                             DOORBELL_EN, 0);
4388                 }
4389                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4390
4391                 if (adev->asic_type == CHIP_TONGA) {
4392                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4393                                             DOORBELL_RANGE_LOWER,
4394                                             AMDGPU_DOORBELL_GFX_RING0);
4395                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4396
4397                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4398                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4399                 }
4400
4401         }
4402
4403         /* start the ring */
4404         gfx_v8_0_cp_gfx_start(adev);
4405         ring->ready = true;
4406         r = amdgpu_ring_test_ring(ring);
4407         if (r)
4408                 ring->ready = false;
4409
4410         return r;
4411 }
4412
4413 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4414 {
4415         int i;
4416
4417         if (enable) {
4418                 WREG32(mmCP_MEC_CNTL, 0);
4419         } else {
4420                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4421                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4422                         adev->gfx.compute_ring[i].ready = false;
4423         }
4424         udelay(50);
4425 }
4426
4427 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4428 {
4429         const struct gfx_firmware_header_v1_0 *mec_hdr;
4430         const __le32 *fw_data;
4431         unsigned i, fw_size;
4432
4433         if (!adev->gfx.mec_fw)
4434                 return -EINVAL;
4435
4436         gfx_v8_0_cp_compute_enable(adev, false);
4437
4438         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4439         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4440
4441         fw_data = (const __le32 *)
4442                 (adev->gfx.mec_fw->data +
4443                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4444         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4445
4446         /* MEC1 */
4447         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4448         for (i = 0; i < fw_size; i++)
4449                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4450         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4451
4452         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4453         if (adev->gfx.mec2_fw) {
4454                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4455
4456                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4457                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4458
4459                 fw_data = (const __le32 *)
4460                         (adev->gfx.mec2_fw->data +
4461                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4462                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4463
4464                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4465                 for (i = 0; i < fw_size; i++)
4466                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4467                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4468         }
4469
4470         return 0;
4471 }
4472
4473 struct vi_mqd {
4474         uint32_t header;  /* ordinal0 */
4475         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4476         uint32_t compute_dim_x;  /* ordinal2 */
4477         uint32_t compute_dim_y;  /* ordinal3 */
4478         uint32_t compute_dim_z;  /* ordinal4 */
4479         uint32_t compute_start_x;  /* ordinal5 */
4480         uint32_t compute_start_y;  /* ordinal6 */
4481         uint32_t compute_start_z;  /* ordinal7 */
4482         uint32_t compute_num_thread_x;  /* ordinal8 */
4483         uint32_t compute_num_thread_y;  /* ordinal9 */
4484         uint32_t compute_num_thread_z;  /* ordinal10 */
4485         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4486         uint32_t compute_perfcount_enable;  /* ordinal12 */
4487         uint32_t compute_pgm_lo;  /* ordinal13 */
4488         uint32_t compute_pgm_hi;  /* ordinal14 */
4489         uint32_t compute_tba_lo;  /* ordinal15 */
4490         uint32_t compute_tba_hi;  /* ordinal16 */
4491         uint32_t compute_tma_lo;  /* ordinal17 */
4492         uint32_t compute_tma_hi;  /* ordinal18 */
4493         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4494         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4495         uint32_t compute_vmid;  /* ordinal21 */
4496         uint32_t compute_resource_limits;  /* ordinal22 */
4497         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4498         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4499         uint32_t compute_tmpring_size;  /* ordinal25 */
4500         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4501         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4502         uint32_t compute_restart_x;  /* ordinal28 */
4503         uint32_t compute_restart_y;  /* ordinal29 */
4504         uint32_t compute_restart_z;  /* ordinal30 */
4505         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4506         uint32_t compute_misc_reserved;  /* ordinal32 */
4507         uint32_t compute_dispatch_id;  /* ordinal33 */
4508         uint32_t compute_threadgroup_id;  /* ordinal34 */
4509         uint32_t compute_relaunch;  /* ordinal35 */
4510         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4511         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4512         uint32_t compute_wave_restore_control;  /* ordinal38 */
4513         uint32_t reserved9;  /* ordinal39 */
4514         uint32_t reserved10;  /* ordinal40 */
4515         uint32_t reserved11;  /* ordinal41 */
4516         uint32_t reserved12;  /* ordinal42 */
4517         uint32_t reserved13;  /* ordinal43 */
4518         uint32_t reserved14;  /* ordinal44 */
4519         uint32_t reserved15;  /* ordinal45 */
4520         uint32_t reserved16;  /* ordinal46 */
4521         uint32_t reserved17;  /* ordinal47 */
4522         uint32_t reserved18;  /* ordinal48 */
4523         uint32_t reserved19;  /* ordinal49 */
4524         uint32_t reserved20;  /* ordinal50 */
4525         uint32_t reserved21;  /* ordinal51 */
4526         uint32_t reserved22;  /* ordinal52 */
4527         uint32_t reserved23;  /* ordinal53 */
4528         uint32_t reserved24;  /* ordinal54 */
4529         uint32_t reserved25;  /* ordinal55 */
4530         uint32_t reserved26;  /* ordinal56 */
4531         uint32_t reserved27;  /* ordinal57 */
4532         uint32_t reserved28;  /* ordinal58 */
4533         uint32_t reserved29;  /* ordinal59 */
4534         uint32_t reserved30;  /* ordinal60 */
4535         uint32_t reserved31;  /* ordinal61 */
4536         uint32_t reserved32;  /* ordinal62 */
4537         uint32_t reserved33;  /* ordinal63 */
4538         uint32_t reserved34;  /* ordinal64 */
4539         uint32_t compute_user_data_0;  /* ordinal65 */
4540         uint32_t compute_user_data_1;  /* ordinal66 */
4541         uint32_t compute_user_data_2;  /* ordinal67 */
4542         uint32_t compute_user_data_3;  /* ordinal68 */
4543         uint32_t compute_user_data_4;  /* ordinal69 */
4544         uint32_t compute_user_data_5;  /* ordinal70 */
4545         uint32_t compute_user_data_6;  /* ordinal71 */
4546         uint32_t compute_user_data_7;  /* ordinal72 */
4547         uint32_t compute_user_data_8;  /* ordinal73 */
4548         uint32_t compute_user_data_9;  /* ordinal74 */
4549         uint32_t compute_user_data_10;  /* ordinal75 */
4550         uint32_t compute_user_data_11;  /* ordinal76 */
4551         uint32_t compute_user_data_12;  /* ordinal77 */
4552         uint32_t compute_user_data_13;  /* ordinal78 */
4553         uint32_t compute_user_data_14;  /* ordinal79 */
4554         uint32_t compute_user_data_15;  /* ordinal80 */
4555         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4556         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4557         uint32_t reserved35;  /* ordinal83 */
4558         uint32_t reserved36;  /* ordinal84 */
4559         uint32_t reserved37;  /* ordinal85 */
4560         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4561         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4562         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4563         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4564         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4565         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4566         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4567         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4568         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4569         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4570         uint32_t reserved38;  /* ordinal96 */
4571         uint32_t reserved39;  /* ordinal97 */
4572         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4573         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4574         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4575         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4576         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4577         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4578         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4579         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4580         uint32_t reserved40;  /* ordinal106 */
4581         uint32_t reserved41;  /* ordinal107 */
4582         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4583         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4584         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4585         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4586         uint32_t reserved42;  /* ordinal112 */
4587         uint32_t reserved43;  /* ordinal113 */
4588         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4589         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4590         uint32_t cp_packet_id_lo;  /* ordinal116 */
4591         uint32_t cp_packet_id_hi;  /* ordinal117 */
4592         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4593         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4594         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4595         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4596         uint32_t gds_save_mask_lo;  /* ordinal122 */
4597         uint32_t gds_save_mask_hi;  /* ordinal123 */
4598         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4599         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4600         uint32_t reserved44;  /* ordinal126 */
4601         uint32_t reserved45;  /* ordinal127 */
4602         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4603         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4604         uint32_t cp_hqd_active;  /* ordinal130 */
4605         uint32_t cp_hqd_vmid;  /* ordinal131 */
4606         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4607         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4608         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4609         uint32_t cp_hqd_quantum;  /* ordinal135 */
4610         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4611         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4612         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4613         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4614         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4615         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4616         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4617         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4618         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4619         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4620         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4621         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4622         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4623         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4624         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4625         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4626         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4627         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4628         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4629         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4630         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4631         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4632         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4633         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4634         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4635         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4636         uint32_t cp_mqd_control;  /* ordinal162 */
4637         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4638         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4639         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4640         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4641         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4642         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4643         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4644         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4645         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4646         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4647         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4648         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4649         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4650         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4651         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4652         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4653         uint32_t cp_hqd_error;  /* ordinal179 */
4654         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4655         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4656         uint32_t reserved46;  /* ordinal182 */
4657         uint32_t reserved47;  /* ordinal183 */
4658         uint32_t reserved48;  /* ordinal184 */
4659         uint32_t reserved49;  /* ordinal185 */
4660         uint32_t reserved50;  /* ordinal186 */
4661         uint32_t reserved51;  /* ordinal187 */
4662         uint32_t reserved52;  /* ordinal188 */
4663         uint32_t reserved53;  /* ordinal189 */
4664         uint32_t reserved54;  /* ordinal190 */
4665         uint32_t reserved55;  /* ordinal191 */
4666         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4667         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4668         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4669         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4670         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4671         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4672         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4673         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4674         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4675         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4676         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4677         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4678         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4679         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4680         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4681         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4682         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4683         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4684         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4685         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4686         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4687         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4688         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4689         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4690         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4691         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4692         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4693         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4694         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4695         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4696         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4697         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4698         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4699         uint32_t reserved56;  /* ordinal225 */
4700         uint32_t reserved57;  /* ordinal226 */
4701         uint32_t reserved58;  /* ordinal227 */
4702         uint32_t set_resources_header;  /* ordinal228 */
4703         uint32_t set_resources_dw1;  /* ordinal229 */
4704         uint32_t set_resources_dw2;  /* ordinal230 */
4705         uint32_t set_resources_dw3;  /* ordinal231 */
4706         uint32_t set_resources_dw4;  /* ordinal232 */
4707         uint32_t set_resources_dw5;  /* ordinal233 */
4708         uint32_t set_resources_dw6;  /* ordinal234 */
4709         uint32_t set_resources_dw7;  /* ordinal235 */
4710         uint32_t reserved59;  /* ordinal236 */
4711         uint32_t reserved60;  /* ordinal237 */
4712         uint32_t reserved61;  /* ordinal238 */
4713         uint32_t reserved62;  /* ordinal239 */
4714         uint32_t reserved63;  /* ordinal240 */
4715         uint32_t reserved64;  /* ordinal241 */
4716         uint32_t reserved65;  /* ordinal242 */
4717         uint32_t reserved66;  /* ordinal243 */
4718         uint32_t reserved67;  /* ordinal244 */
4719         uint32_t reserved68;  /* ordinal245 */
4720         uint32_t reserved69;  /* ordinal246 */
4721         uint32_t reserved70;  /* ordinal247 */
4722         uint32_t reserved71;  /* ordinal248 */
4723         uint32_t reserved72;  /* ordinal249 */
4724         uint32_t reserved73;  /* ordinal250 */
4725         uint32_t reserved74;  /* ordinal251 */
4726         uint32_t reserved75;  /* ordinal252 */
4727         uint32_t reserved76;  /* ordinal253 */
4728         uint32_t reserved77;  /* ordinal254 */
4729         uint32_t reserved78;  /* ordinal255 */
4730
4731         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4732 };
4733
4734 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4735 {
4736         int i, r;
4737
4738         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4739                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4740
4741                 if (ring->mqd_obj) {
4742                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4743                         if (unlikely(r != 0))
4744                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4745
4746                         amdgpu_bo_unpin(ring->mqd_obj);
4747                         amdgpu_bo_unreserve(ring->mqd_obj);
4748
4749                         amdgpu_bo_unref(&ring->mqd_obj);
4750                         ring->mqd_obj = NULL;
4751                 }
4752         }
4753 }
4754
4755 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4756 {
4757         int r, i, j;
4758         u32 tmp;
4759         bool use_doorbell = true;
4760         u64 hqd_gpu_addr;
4761         u64 mqd_gpu_addr;
4762         u64 eop_gpu_addr;
4763         u64 wb_gpu_addr;
4764         u32 *buf;
4765         struct vi_mqd *mqd;
4766
4767         /* init the pipes */
4768         mutex_lock(&adev->srbm_mutex);
4769         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4770                 int me = (i < 4) ? 1 : 2;
4771                 int pipe = (i < 4) ? i : (i - 4);
4772
4773                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4774                 eop_gpu_addr >>= 8;
4775
4776                 vi_srbm_select(adev, me, pipe, 0, 0);
4777
4778                 /* write the EOP addr */
4779                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4780                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4781
4782                 /* set the VMID assigned */
4783                 WREG32(mmCP_HQD_VMID, 0);
4784
4785                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4786                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4787                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4788                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4789                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4790         }
4791         vi_srbm_select(adev, 0, 0, 0, 0);
4792         mutex_unlock(&adev->srbm_mutex);
4793
4794         /* init the queues.  Just two for now. */
4795         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4796                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4797
4798                 if (ring->mqd_obj == NULL) {
4799                         r = amdgpu_bo_create(adev,
4800                                              sizeof(struct vi_mqd),
4801                                              PAGE_SIZE, true,
4802                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4803                                              NULL, &ring->mqd_obj);
4804                         if (r) {
4805                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4806                                 return r;
4807                         }
4808                 }
4809
4810                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4811                 if (unlikely(r != 0)) {
4812                         gfx_v8_0_cp_compute_fini(adev);
4813                         return r;
4814                 }
4815                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4816                                   &mqd_gpu_addr);
4817                 if (r) {
4818                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4819                         gfx_v8_0_cp_compute_fini(adev);
4820                         return r;
4821                 }
4822                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4823                 if (r) {
4824                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4825                         gfx_v8_0_cp_compute_fini(adev);
4826                         return r;
4827                 }
4828
4829                 /* init the mqd struct */
4830                 memset(buf, 0, sizeof(struct vi_mqd));
4831
4832                 mqd = (struct vi_mqd *)buf;
4833                 mqd->header = 0xC0310800;
4834                 mqd->compute_pipelinestat_enable = 0x00000001;
4835                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4836                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4837                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4838                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4839                 mqd->compute_misc_reserved = 0x00000003;
4840
4841                 mutex_lock(&adev->srbm_mutex);
4842                 vi_srbm_select(adev, ring->me,
4843                                ring->pipe,
4844                                ring->queue, 0);
4845
4846                 /* disable wptr polling */
4847                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4848                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4849                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4850
4851                 mqd->cp_hqd_eop_base_addr_lo =
4852                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4853                 mqd->cp_hqd_eop_base_addr_hi =
4854                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4855
4856                 /* enable doorbell? */
4857                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4858                 if (use_doorbell) {
4859                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4860                 } else {
4861                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4862                 }
4863                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4864                 mqd->cp_hqd_pq_doorbell_control = tmp;
4865
4866                 /* disable the queue if it's active */
4867                 mqd->cp_hqd_dequeue_request = 0;
4868                 mqd->cp_hqd_pq_rptr = 0;
4869                 mqd->cp_hqd_pq_wptr= 0;
4870                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4871                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4872                         for (j = 0; j < adev->usec_timeout; j++) {
4873                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4874                                         break;
4875                                 udelay(1);
4876                         }
4877                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4878                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4879                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4880                 }
4881
4882                 /* set the pointer to the MQD */
4883                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4884                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4885                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4886                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4887
4888                 /* set MQD vmid to 0 */
4889                 tmp = RREG32(mmCP_MQD_CONTROL);
4890                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4891                 WREG32(mmCP_MQD_CONTROL, tmp);
4892                 mqd->cp_mqd_control = tmp;
4893
4894                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4895                 hqd_gpu_addr = ring->gpu_addr >> 8;
4896                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4897                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4898                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4899                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4900
4901                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4902                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4903                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4904                                     (order_base_2(ring->ring_size / 4) - 1));
4905                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4906                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4907 #ifdef __BIG_ENDIAN
4908                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4909 #endif
4910                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4911                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4912                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4913                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4914                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4915                 mqd->cp_hqd_pq_control = tmp;
4916
4917                 /* set the wb address wether it's enabled or not */
4918                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4919                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4920                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4921                         upper_32_bits(wb_gpu_addr) & 0xffff;
4922                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4923                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4924                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4925                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4926
4927                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4928                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4929                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4930                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4931                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4932                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4933                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4934
4935                 /* enable the doorbell if requested */
4936                 if (use_doorbell) {
4937                         if ((adev->asic_type == CHIP_CARRIZO) ||
4938                             (adev->asic_type == CHIP_FIJI) ||
4939                             (adev->asic_type == CHIP_STONEY) ||
4940                             (adev->asic_type == CHIP_POLARIS11) ||
4941                             (adev->asic_type == CHIP_POLARIS10)) {
4942                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4943                                        AMDGPU_DOORBELL_KIQ << 2);
4944                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4945                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4946                         }
4947                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4948                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4949                                             DOORBELL_OFFSET, ring->doorbell_index);
4950                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4951                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4952                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4953                         mqd->cp_hqd_pq_doorbell_control = tmp;
4954
4955                 } else {
4956                         mqd->cp_hqd_pq_doorbell_control = 0;
4957                 }
4958                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4959                        mqd->cp_hqd_pq_doorbell_control);
4960
4961                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4962                 ring->wptr = 0;
4963                 mqd->cp_hqd_pq_wptr = ring->wptr;
4964                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4965                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4966
4967                 /* set the vmid for the queue */
4968                 mqd->cp_hqd_vmid = 0;
4969                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4970
4971                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4972                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4973                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4974                 mqd->cp_hqd_persistent_state = tmp;
4975                 if (adev->asic_type == CHIP_STONEY ||
4976                         adev->asic_type == CHIP_POLARIS11 ||
4977                         adev->asic_type == CHIP_POLARIS10) {
4978                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4979                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4980                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4981                 }
4982
4983                 /* activate the queue */
4984                 mqd->cp_hqd_active = 1;
4985                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4986
4987                 vi_srbm_select(adev, 0, 0, 0, 0);
4988                 mutex_unlock(&adev->srbm_mutex);
4989
4990                 amdgpu_bo_kunmap(ring->mqd_obj);
4991                 amdgpu_bo_unreserve(ring->mqd_obj);
4992         }
4993
4994         if (use_doorbell) {
4995                 tmp = RREG32(mmCP_PQ_STATUS);
4996                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4997                 WREG32(mmCP_PQ_STATUS, tmp);
4998         }
4999
5000         gfx_v8_0_cp_compute_enable(adev, true);
5001
5002         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5003                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5004
5005                 ring->ready = true;
5006                 r = amdgpu_ring_test_ring(ring);
5007                 if (r)
5008                         ring->ready = false;
5009         }
5010
5011         return 0;
5012 }
5013
5014 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5015 {
5016         int r;
5017
5018         if (!(adev->flags & AMD_IS_APU))
5019                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5020
5021         if (!adev->pp_enabled) {
5022                 if (!adev->firmware.smu_load) {
5023                         /* legacy firmware loading */
5024                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5025                         if (r)
5026                                 return r;
5027
5028                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5029                         if (r)
5030                                 return r;
5031                 } else {
5032                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5033                                                         AMDGPU_UCODE_ID_CP_CE);
5034                         if (r)
5035                                 return -EINVAL;
5036
5037                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5038                                                         AMDGPU_UCODE_ID_CP_PFP);
5039                         if (r)
5040                                 return -EINVAL;
5041
5042                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5043                                                         AMDGPU_UCODE_ID_CP_ME);
5044                         if (r)
5045                                 return -EINVAL;
5046
5047                         if (adev->asic_type == CHIP_TOPAZ) {
5048                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5049                                 if (r)
5050                                         return r;
5051                         } else {
5052                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5053                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5054                                 if (r)
5055                                         return -EINVAL;
5056                         }
5057                 }
5058         }
5059
5060         r = gfx_v8_0_cp_gfx_resume(adev);
5061         if (r)
5062                 return r;
5063
5064         r = gfx_v8_0_cp_compute_resume(adev);
5065         if (r)
5066                 return r;
5067
5068         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5069
5070         return 0;
5071 }
5072
5073 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5074 {
5075         gfx_v8_0_cp_gfx_enable(adev, enable);
5076         gfx_v8_0_cp_compute_enable(adev, enable);
5077 }
5078
5079 static int gfx_v8_0_hw_init(void *handle)
5080 {
5081         int r;
5082         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5083
5084         gfx_v8_0_init_golden_registers(adev);
5085         gfx_v8_0_gpu_init(adev);
5086
5087         r = gfx_v8_0_rlc_resume(adev);
5088         if (r)
5089                 return r;
5090
5091         r = gfx_v8_0_cp_resume(adev);
5092
5093         return r;
5094 }
5095
5096 static int gfx_v8_0_hw_fini(void *handle)
5097 {
5098         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5099
5100         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5101         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5102         gfx_v8_0_cp_enable(adev, false);
5103         gfx_v8_0_rlc_stop(adev);
5104         gfx_v8_0_cp_compute_fini(adev);
5105
5106         amdgpu_set_powergating_state(adev,
5107                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5108
5109         return 0;
5110 }
5111
5112 static int gfx_v8_0_suspend(void *handle)
5113 {
5114         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5115
5116         return gfx_v8_0_hw_fini(adev);
5117 }
5118
5119 static int gfx_v8_0_resume(void *handle)
5120 {
5121         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5122
5123         return gfx_v8_0_hw_init(adev);
5124 }
5125
5126 static bool gfx_v8_0_is_idle(void *handle)
5127 {
5128         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5129
5130         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5131                 return false;
5132         else
5133                 return true;
5134 }
5135
5136 static int gfx_v8_0_wait_for_idle(void *handle)
5137 {
5138         unsigned i;
5139         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5140
5141         for (i = 0; i < adev->usec_timeout; i++) {
5142                 if (gfx_v8_0_is_idle(handle))
5143                         return 0;
5144
5145                 udelay(1);
5146         }
5147         return -ETIMEDOUT;
5148 }
5149
5150 static bool gfx_v8_0_check_soft_reset(void *handle)
5151 {
5152         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5153         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5154         u32 tmp;
5155
5156         /* GRBM_STATUS */
5157         tmp = RREG32(mmGRBM_STATUS);
5158         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5159                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5160                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5161                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5162                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5163                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5164                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5165                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5166                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5167                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5168                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5169                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5170                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5171         }
5172
5173         /* GRBM_STATUS2 */
5174         tmp = RREG32(mmGRBM_STATUS2);
5175         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5176                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5177                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5178
5179         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5180             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5181             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5182                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5183                                                 SOFT_RESET_CPF, 1);
5184                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5185                                                 SOFT_RESET_CPC, 1);
5186                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5187                                                 SOFT_RESET_CPG, 1);
5188                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5189                                                 SOFT_RESET_GRBM, 1);
5190         }
5191
5192         /* SRBM_STATUS */
5193         tmp = RREG32(mmSRBM_STATUS);
5194         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5195                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5196                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5197         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5198                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5199                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5200
5201         if (grbm_soft_reset || srbm_soft_reset) {
5202                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5203                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5204                 return true;
5205         } else {
5206                 adev->gfx.grbm_soft_reset = 0;
5207                 adev->gfx.srbm_soft_reset = 0;
5208                 return false;
5209         }
5210 }
5211
5212 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5213                                   struct amdgpu_ring *ring)
5214 {
5215         int i;
5216
5217         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5218         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5219                 u32 tmp;
5220                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5221                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5222                                     DEQUEUE_REQ, 2);
5223                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5224                 for (i = 0; i < adev->usec_timeout; i++) {
5225                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5226                                 break;
5227                         udelay(1);
5228                 }
5229         }
5230 }
5231
5232 static int gfx_v8_0_pre_soft_reset(void *handle)
5233 {
5234         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5235         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5236
5237         if ((!adev->gfx.grbm_soft_reset) &&
5238             (!adev->gfx.srbm_soft_reset))
5239                 return 0;
5240
5241         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5242         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5243
5244         /* stop the rlc */
5245         gfx_v8_0_rlc_stop(adev);
5246
5247         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5248             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5249                 /* Disable GFX parsing/prefetching */
5250                 gfx_v8_0_cp_gfx_enable(adev, false);
5251
5252         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5253             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5254             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5255             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5256                 int i;
5257
5258                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5259                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5260
5261                         gfx_v8_0_inactive_hqd(adev, ring);
5262                 }
5263                 /* Disable MEC parsing/prefetching */
5264                 gfx_v8_0_cp_compute_enable(adev, false);
5265         }
5266
5267        return 0;
5268 }
5269
5270 static int gfx_v8_0_soft_reset(void *handle)
5271 {
5272         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5273         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5274         u32 tmp;
5275
5276         if ((!adev->gfx.grbm_soft_reset) &&
5277             (!adev->gfx.srbm_soft_reset))
5278                 return 0;
5279
5280         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5281         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5282
5283         if (grbm_soft_reset || srbm_soft_reset) {
5284                 tmp = RREG32(mmGMCON_DEBUG);
5285                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5286                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5287                 WREG32(mmGMCON_DEBUG, tmp);
5288                 udelay(50);
5289         }
5290
5291         if (grbm_soft_reset) {
5292                 tmp = RREG32(mmGRBM_SOFT_RESET);
5293                 tmp |= grbm_soft_reset;
5294                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5295                 WREG32(mmGRBM_SOFT_RESET, tmp);
5296                 tmp = RREG32(mmGRBM_SOFT_RESET);
5297
5298                 udelay(50);
5299
5300                 tmp &= ~grbm_soft_reset;
5301                 WREG32(mmGRBM_SOFT_RESET, tmp);
5302                 tmp = RREG32(mmGRBM_SOFT_RESET);
5303         }
5304
5305         if (srbm_soft_reset) {
5306                 tmp = RREG32(mmSRBM_SOFT_RESET);
5307                 tmp |= srbm_soft_reset;
5308                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5309                 WREG32(mmSRBM_SOFT_RESET, tmp);
5310                 tmp = RREG32(mmSRBM_SOFT_RESET);
5311
5312                 udelay(50);
5313
5314                 tmp &= ~srbm_soft_reset;
5315                 WREG32(mmSRBM_SOFT_RESET, tmp);
5316                 tmp = RREG32(mmSRBM_SOFT_RESET);
5317         }
5318
5319         if (grbm_soft_reset || srbm_soft_reset) {
5320                 tmp = RREG32(mmGMCON_DEBUG);
5321                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5322                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5323                 WREG32(mmGMCON_DEBUG, tmp);
5324         }
5325
5326         /* Wait a little for things to settle down */
5327         udelay(50);
5328
5329         return 0;
5330 }
5331
5332 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5333                               struct amdgpu_ring *ring)
5334 {
5335         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5336         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5337         WREG32(mmCP_HQD_PQ_RPTR, 0);
5338         WREG32(mmCP_HQD_PQ_WPTR, 0);
5339         vi_srbm_select(adev, 0, 0, 0, 0);
5340 }
5341
5342 static int gfx_v8_0_post_soft_reset(void *handle)
5343 {
5344         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5345         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5346
5347         if ((!adev->gfx.grbm_soft_reset) &&
5348             (!adev->gfx.srbm_soft_reset))
5349                 return 0;
5350
5351         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5352         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5353
5354         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5355             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5356                 gfx_v8_0_cp_gfx_resume(adev);
5357
5358         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5359             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5360             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5361             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5362                 int i;
5363
5364                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5365                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5366
5367                         gfx_v8_0_init_hqd(adev, ring);
5368                 }
5369                 gfx_v8_0_cp_compute_resume(adev);
5370         }
5371         gfx_v8_0_rlc_start(adev);
5372
5373         return 0;
5374 }
5375
5376 /**
5377  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5378  *
5379  * @adev: amdgpu_device pointer
5380  *
5381  * Fetches a GPU clock counter snapshot.
5382  * Returns the 64 bit clock counter snapshot.
5383  */
5384 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5385 {
5386         uint64_t clock;
5387
5388         mutex_lock(&adev->gfx.gpu_clock_mutex);
5389         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5390         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5391                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5392         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5393         return clock;
5394 }
5395
5396 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5397                                           uint32_t vmid,
5398                                           uint32_t gds_base, uint32_t gds_size,
5399                                           uint32_t gws_base, uint32_t gws_size,
5400                                           uint32_t oa_base, uint32_t oa_size)
5401 {
5402         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5403         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5404
5405         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5406         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5407
5408         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5409         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5410
5411         /* GDS Base */
5412         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5413         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5414                                 WRITE_DATA_DST_SEL(0)));
5415         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5416         amdgpu_ring_write(ring, 0);
5417         amdgpu_ring_write(ring, gds_base);
5418
5419         /* GDS Size */
5420         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5421         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5422                                 WRITE_DATA_DST_SEL(0)));
5423         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5424         amdgpu_ring_write(ring, 0);
5425         amdgpu_ring_write(ring, gds_size);
5426
5427         /* GWS */
5428         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5429         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5430                                 WRITE_DATA_DST_SEL(0)));
5431         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5432         amdgpu_ring_write(ring, 0);
5433         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5434
5435         /* OA */
5436         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5437         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5438                                 WRITE_DATA_DST_SEL(0)));
5439         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5440         amdgpu_ring_write(ring, 0);
5441         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5442 }
5443
5444 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5445         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5446         .select_se_sh = &gfx_v8_0_select_se_sh,
5447 };
5448
5449 static int gfx_v8_0_early_init(void *handle)
5450 {
5451         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5452
5453         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5454         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5455         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5456         gfx_v8_0_set_ring_funcs(adev);
5457         gfx_v8_0_set_irq_funcs(adev);
5458         gfx_v8_0_set_gds_init(adev);
5459         gfx_v8_0_set_rlc_funcs(adev);
5460
5461         return 0;
5462 }
5463
5464 static int gfx_v8_0_late_init(void *handle)
5465 {
5466         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5467         int r;
5468
5469         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5470         if (r)
5471                 return r;
5472
5473         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5474         if (r)
5475                 return r;
5476
5477         /* requires IBs so do in late init after IB pool is initialized */
5478         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5479         if (r)
5480                 return r;
5481
5482         amdgpu_set_powergating_state(adev,
5483                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5484
5485         return 0;
5486 }
5487
5488 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5489                                                        bool enable)
5490 {
5491         if (adev->asic_type == CHIP_POLARIS11)
5492                 /* Send msg to SMU via Powerplay */
5493                 amdgpu_set_powergating_state(adev,
5494                                              AMD_IP_BLOCK_TYPE_SMC,
5495                                              enable ?
5496                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5497
5498         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5499 }
5500
5501 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5502                                                         bool enable)
5503 {
5504         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5505 }
5506
5507 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5508                 bool enable)
5509 {
5510         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5511 }
5512
5513 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5514                                           bool enable)
5515 {
5516         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5517 }
5518
5519 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5520                                                 bool enable)
5521 {
5522         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5523
5524         /* Read any GFX register to wake up GFX. */
5525         if (!enable)
5526                 RREG32(mmDB_RENDER_CONTROL);
5527 }
5528
5529 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5530                                           bool enable)
5531 {
5532         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5533                 cz_enable_gfx_cg_power_gating(adev, true);
5534                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5535                         cz_enable_gfx_pipeline_power_gating(adev, true);
5536         } else {
5537                 cz_enable_gfx_cg_power_gating(adev, false);
5538                 cz_enable_gfx_pipeline_power_gating(adev, false);
5539         }
5540 }
5541
5542 static int gfx_v8_0_set_powergating_state(void *handle,
5543                                           enum amd_powergating_state state)
5544 {
5545         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5546         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5547
5548         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5549                 return 0;
5550
5551         switch (adev->asic_type) {
5552         case CHIP_CARRIZO:
5553         case CHIP_STONEY:
5554                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5555                         cz_update_gfx_cg_power_gating(adev, enable);
5556
5557                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5558                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5559                 else
5560                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5561
5562                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5563                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5564                 else
5565                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5566                 break;
5567         case CHIP_POLARIS11:
5568                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5569                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5570                 else
5571                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5572
5573                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5574                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5575                 else
5576                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5577
5578                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5579                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5580                 else
5581                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5582                 break;
5583         default:
5584                 break;
5585         }
5586
5587         return 0;
5588 }
5589
5590 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5591                                      uint32_t reg_addr, uint32_t cmd)
5592 {
5593         uint32_t data;
5594
5595         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5596
5597         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5598         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5599
5600         data = RREG32(mmRLC_SERDES_WR_CTRL);
5601         if (adev->asic_type == CHIP_STONEY)
5602                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5603                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5604                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5605                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5606                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5607                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5608                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5609                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5610                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5611         else
5612                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5613                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5614                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5615                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5616                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5617                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5618                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5619                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5620                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5621                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5622                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5623         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5624                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5625                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5626                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5627
5628         WREG32(mmRLC_SERDES_WR_CTRL, data);
5629 }
5630
5631 #define MSG_ENTER_RLC_SAFE_MODE     1
5632 #define MSG_EXIT_RLC_SAFE_MODE      0
5633 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5634 #define RLC_GPR_REG2__REQ__SHIFT 0
5635 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5636 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5637
5638 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5639 {
5640         u32 data = 0;
5641         unsigned i;
5642
5643         data = RREG32(mmRLC_CNTL);
5644         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5645                 return;
5646
5647         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5648             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5649                                AMD_PG_SUPPORT_GFX_DMG))) {
5650                 data |= RLC_GPR_REG2__REQ_MASK;
5651                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5652                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5653                 WREG32(mmRLC_GPR_REG2, data);
5654
5655                 for (i = 0; i < adev->usec_timeout; i++) {
5656                         if ((RREG32(mmRLC_GPM_STAT) &
5657                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5658                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5659                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5660                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5661                                 break;
5662                         udelay(1);
5663                 }
5664
5665                 for (i = 0; i < adev->usec_timeout; i++) {
5666                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5667                                 break;
5668                         udelay(1);
5669                 }
5670                 adev->gfx.rlc.in_safe_mode = true;
5671         }
5672 }
5673
5674 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5675 {
5676         u32 data;
5677         unsigned i;
5678
5679         data = RREG32(mmRLC_CNTL);
5680         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5681                 return;
5682
5683         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5684             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5685                                AMD_PG_SUPPORT_GFX_DMG))) {
5686                 data |= RLC_GPR_REG2__REQ_MASK;
5687                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5688                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5689                 WREG32(mmRLC_GPR_REG2, data);
5690                 adev->gfx.rlc.in_safe_mode = false;
5691         }
5692
5693         for (i = 0; i < adev->usec_timeout; i++) {
5694                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5695                         break;
5696                 udelay(1);
5697         }
5698 }
5699
5700 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5701 {
5702         u32 data;
5703         unsigned i;
5704
5705         data = RREG32(mmRLC_CNTL);
5706         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5707                 return;
5708
5709         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5710                 data |= RLC_SAFE_MODE__CMD_MASK;
5711                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5712                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5713                 WREG32(mmRLC_SAFE_MODE, data);
5714
5715                 for (i = 0; i < adev->usec_timeout; i++) {
5716                         if ((RREG32(mmRLC_GPM_STAT) &
5717                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5718                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5719                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5720                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5721                                 break;
5722                         udelay(1);
5723                 }
5724
5725                 for (i = 0; i < adev->usec_timeout; i++) {
5726                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5727                                 break;
5728                         udelay(1);
5729                 }
5730                 adev->gfx.rlc.in_safe_mode = true;
5731         }
5732 }
5733
5734 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5735 {
5736         u32 data = 0;
5737         unsigned i;
5738
5739         data = RREG32(mmRLC_CNTL);
5740         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5741                 return;
5742
5743         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5744                 if (adev->gfx.rlc.in_safe_mode) {
5745                         data |= RLC_SAFE_MODE__CMD_MASK;
5746                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5747                         WREG32(mmRLC_SAFE_MODE, data);
5748                         adev->gfx.rlc.in_safe_mode = false;
5749                 }
5750         }
5751
5752         for (i = 0; i < adev->usec_timeout; i++) {
5753                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5754                         break;
5755                 udelay(1);
5756         }
5757 }
5758
5759 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5760 {
5761         adev->gfx.rlc.in_safe_mode = true;
5762 }
5763
5764 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5765 {
5766         adev->gfx.rlc.in_safe_mode = false;
5767 }
5768
5769 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5770         .enter_safe_mode = cz_enter_rlc_safe_mode,
5771         .exit_safe_mode = cz_exit_rlc_safe_mode
5772 };
5773
5774 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5775         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5776         .exit_safe_mode = iceland_exit_rlc_safe_mode
5777 };
5778
5779 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5780         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5781         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5782 };
5783
5784 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5785                                                       bool enable)
5786 {
5787         uint32_t temp, data;
5788
5789         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5790
5791         /* It is disabled by HW by default */
5792         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5793                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5794                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5795                                 /* 1 - RLC memory Light sleep */
5796                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5797
5798                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5799                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5800                 }
5801
5802                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5803                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5804                 if (adev->flags & AMD_IS_APU)
5805                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5806                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5807                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5808                 else
5809                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5810                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5811                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5812                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5813
5814                 if (temp != data)
5815                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5816
5817                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5818                 gfx_v8_0_wait_for_rlc_serdes(adev);
5819
5820                 /* 5 - clear mgcg override */
5821                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5822
5823                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5824                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5825                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5826                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5827                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5828                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5829                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5830                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5831                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5832                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5833                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5834                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5835                         if (temp != data)
5836                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5837                 }
5838                 udelay(50);
5839
5840                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5841                 gfx_v8_0_wait_for_rlc_serdes(adev);
5842         } else {
5843                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5844                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5845                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5846                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5847                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5848                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5849                 if (temp != data)
5850                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5851
5852                 /* 2 - disable MGLS in RLC */
5853                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5854                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5855                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5856                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5857                 }
5858
5859                 /* 3 - disable MGLS in CP */
5860                 data = RREG32(mmCP_MEM_SLP_CNTL);
5861                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5862                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5863                         WREG32(mmCP_MEM_SLP_CNTL, data);
5864                 }
5865
5866                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5867                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5868                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5869                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5870                 if (temp != data)
5871                         WREG32(mmCGTS_SM_CTRL_REG, data);
5872
5873                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5874                 gfx_v8_0_wait_for_rlc_serdes(adev);
5875
5876                 /* 6 - set mgcg override */
5877                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5878
5879                 udelay(50);
5880
5881                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5882                 gfx_v8_0_wait_for_rlc_serdes(adev);
5883         }
5884
5885         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5886 }
5887
5888 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5889                                                       bool enable)
5890 {
5891         uint32_t temp, temp1, data, data1;
5892
5893         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5894
5895         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5896
5897         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5898                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5899                  * Cmp_busy/GFX_Idle interrupts
5900                  */
5901                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5902
5903                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5904                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5905                 if (temp1 != data1)
5906                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5907
5908                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5909                 gfx_v8_0_wait_for_rlc_serdes(adev);
5910
5911                 /* 3 - clear cgcg override */
5912                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5913
5914                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5915                 gfx_v8_0_wait_for_rlc_serdes(adev);
5916
5917                 /* 4 - write cmd to set CGLS */
5918                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5919
5920                 /* 5 - enable cgcg */
5921                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5922
5923                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5924                         /* enable cgls*/
5925                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5926
5927                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5928                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5929
5930                         if (temp1 != data1)
5931                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5932                 } else {
5933                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5934                 }
5935
5936                 if (temp != data)
5937                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5938         } else {
5939                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5940                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5941
5942                 /* TEST CGCG */
5943                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5944                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5945                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5946                 if (temp1 != data1)
5947                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5948
5949                 /* read gfx register to wake up cgcg */
5950                 RREG32(mmCB_CGTT_SCLK_CTRL);
5951                 RREG32(mmCB_CGTT_SCLK_CTRL);
5952                 RREG32(mmCB_CGTT_SCLK_CTRL);
5953                 RREG32(mmCB_CGTT_SCLK_CTRL);
5954
5955                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5956                 gfx_v8_0_wait_for_rlc_serdes(adev);
5957
5958                 /* write cmd to Set CGCG Overrride */
5959                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5960
5961                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5962                 gfx_v8_0_wait_for_rlc_serdes(adev);
5963
5964                 /* write cmd to Clear CGLS */
5965                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5966
5967                 /* disable cgcg, cgls should be disabled too. */
5968                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5969                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5970                 if (temp != data)
5971                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5972         }
5973
5974         gfx_v8_0_wait_for_rlc_serdes(adev);
5975
5976         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5977 }
5978 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5979                                             bool enable)
5980 {
5981         if (enable) {
5982                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5983                  * ===  MGCG + MGLS + TS(CG/LS) ===
5984                  */
5985                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5986                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5987         } else {
5988                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5989                  * ===  CGCG + CGLS ===
5990                  */
5991                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5992                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5993         }
5994         return 0;
5995 }
5996
5997 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5998                                           enum amd_clockgating_state state)
5999 {
6000         uint32_t msg_id, pp_state;
6001         void *pp_handle = adev->powerplay.pp_handle;
6002
6003         if (state == AMD_CG_STATE_UNGATE)
6004                 pp_state = 0;
6005         else
6006                 pp_state = PP_STATE_CG | PP_STATE_LS;
6007
6008         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6009                         PP_BLOCK_GFX_CG,
6010                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6011                         pp_state);
6012         amd_set_clockgating_by_smu(pp_handle, msg_id);
6013
6014         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6015                         PP_BLOCK_GFX_MG,
6016                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6017                         pp_state);
6018         amd_set_clockgating_by_smu(pp_handle, msg_id);
6019
6020         return 0;
6021 }
6022
6023 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6024                                           enum amd_clockgating_state state)
6025 {
6026         uint32_t msg_id, pp_state;
6027         void *pp_handle = adev->powerplay.pp_handle;
6028
6029         if (state == AMD_CG_STATE_UNGATE)
6030                 pp_state = 0;
6031         else
6032                 pp_state = PP_STATE_CG | PP_STATE_LS;
6033
6034         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6035                         PP_BLOCK_GFX_CG,
6036                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6037                         pp_state);
6038         amd_set_clockgating_by_smu(pp_handle, msg_id);
6039
6040         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6041                         PP_BLOCK_GFX_3D,
6042                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6043                         pp_state);
6044         amd_set_clockgating_by_smu(pp_handle, msg_id);
6045
6046         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6047                         PP_BLOCK_GFX_MG,
6048                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6049                         pp_state);
6050         amd_set_clockgating_by_smu(pp_handle, msg_id);
6051
6052         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6053                         PP_BLOCK_GFX_RLC,
6054                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6055                         pp_state);
6056         amd_set_clockgating_by_smu(pp_handle, msg_id);
6057
6058         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6059                         PP_BLOCK_GFX_CP,
6060                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6061                         pp_state);
6062         amd_set_clockgating_by_smu(pp_handle, msg_id);
6063
6064         return 0;
6065 }
6066
6067 static int gfx_v8_0_set_clockgating_state(void *handle,
6068                                           enum amd_clockgating_state state)
6069 {
6070         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6071
6072         switch (adev->asic_type) {
6073         case CHIP_FIJI:
6074         case CHIP_CARRIZO:
6075         case CHIP_STONEY:
6076                 gfx_v8_0_update_gfx_clock_gating(adev,
6077                                                  state == AMD_CG_STATE_GATE ? true : false);
6078                 break;
6079         case CHIP_TONGA:
6080                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6081                 break;
6082         case CHIP_POLARIS10:
6083         case CHIP_POLARIS11:
6084                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6085                 break;
6086         default:
6087                 break;
6088         }
6089         return 0;
6090 }
6091
6092 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6093 {
6094         return ring->adev->wb.wb[ring->rptr_offs];
6095 }
6096
6097 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6098 {
6099         struct amdgpu_device *adev = ring->adev;
6100
6101         if (ring->use_doorbell)
6102                 /* XXX check if swapping is necessary on BE */
6103                 return ring->adev->wb.wb[ring->wptr_offs];
6104         else
6105                 return RREG32(mmCP_RB0_WPTR);
6106 }
6107
6108 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6109 {
6110         struct amdgpu_device *adev = ring->adev;
6111
6112         if (ring->use_doorbell) {
6113                 /* XXX check if swapping is necessary on BE */
6114                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6115                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6116         } else {
6117                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6118                 (void)RREG32(mmCP_RB0_WPTR);
6119         }
6120 }
6121
6122 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6123 {
6124         u32 ref_and_mask, reg_mem_engine;
6125
6126         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
6127                 switch (ring->me) {
6128                 case 1:
6129                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6130                         break;
6131                 case 2:
6132                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6133                         break;
6134                 default:
6135                         return;
6136                 }
6137                 reg_mem_engine = 0;
6138         } else {
6139                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6140                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6141         }
6142
6143         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6144         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6145                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6146                                  reg_mem_engine));
6147         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6148         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6149         amdgpu_ring_write(ring, ref_and_mask);
6150         amdgpu_ring_write(ring, ref_and_mask);
6151         amdgpu_ring_write(ring, 0x20); /* poll interval */
6152 }
6153
6154 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6155 {
6156         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6157         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6158                                  WRITE_DATA_DST_SEL(0) |
6159                                  WR_CONFIRM));
6160         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6161         amdgpu_ring_write(ring, 0);
6162         amdgpu_ring_write(ring, 1);
6163
6164 }
6165
6166 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6167                                       struct amdgpu_ib *ib,
6168                                       unsigned vm_id, bool ctx_switch)
6169 {
6170         u32 header, control = 0;
6171
6172         if (ib->flags & AMDGPU_IB_FLAG_CE)
6173                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6174         else
6175                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6176
6177         control |= ib->length_dw | (vm_id << 24);
6178
6179         amdgpu_ring_write(ring, header);
6180         amdgpu_ring_write(ring,
6181 #ifdef __BIG_ENDIAN
6182                           (2 << 0) |
6183 #endif
6184                           (ib->gpu_addr & 0xFFFFFFFC));
6185         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6186         amdgpu_ring_write(ring, control);
6187 }
6188
6189 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6190                                           struct amdgpu_ib *ib,
6191                                           unsigned vm_id, bool ctx_switch)
6192 {
6193         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6194
6195         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6196         amdgpu_ring_write(ring,
6197 #ifdef __BIG_ENDIAN
6198                                 (2 << 0) |
6199 #endif
6200                                 (ib->gpu_addr & 0xFFFFFFFC));
6201         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6202         amdgpu_ring_write(ring, control);
6203 }
6204
6205 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6206                                          u64 seq, unsigned flags)
6207 {
6208         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6209         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6210
6211         /* EVENT_WRITE_EOP - flush caches, send int */
6212         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6213         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6214                                  EOP_TC_ACTION_EN |
6215                                  EOP_TC_WB_ACTION_EN |
6216                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6217                                  EVENT_INDEX(5)));
6218         amdgpu_ring_write(ring, addr & 0xfffffffc);
6219         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6220                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6221         amdgpu_ring_write(ring, lower_32_bits(seq));
6222         amdgpu_ring_write(ring, upper_32_bits(seq));
6223
6224 }
6225
6226 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6227 {
6228         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6229         uint32_t seq = ring->fence_drv.sync_seq;
6230         uint64_t addr = ring->fence_drv.gpu_addr;
6231
6232         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6233         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6234                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6235                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6236         amdgpu_ring_write(ring, addr & 0xfffffffc);
6237         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6238         amdgpu_ring_write(ring, seq);
6239         amdgpu_ring_write(ring, 0xffffffff);
6240         amdgpu_ring_write(ring, 4); /* poll interval */
6241 }
6242
6243 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6244                                         unsigned vm_id, uint64_t pd_addr)
6245 {
6246         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6247
6248         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6249         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6250                                  WRITE_DATA_DST_SEL(0)) |
6251                                  WR_CONFIRM);
6252         if (vm_id < 8) {
6253                 amdgpu_ring_write(ring,
6254                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6255         } else {
6256                 amdgpu_ring_write(ring,
6257                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6258         }
6259         amdgpu_ring_write(ring, 0);
6260         amdgpu_ring_write(ring, pd_addr >> 12);
6261
6262         /* bits 0-15 are the VM contexts0-15 */
6263         /* invalidate the cache */
6264         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6265         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6266                                  WRITE_DATA_DST_SEL(0)));
6267         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6268         amdgpu_ring_write(ring, 0);
6269         amdgpu_ring_write(ring, 1 << vm_id);
6270
6271         /* wait for the invalidate to complete */
6272         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6273         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6274                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6275                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6276         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6277         amdgpu_ring_write(ring, 0);
6278         amdgpu_ring_write(ring, 0); /* ref */
6279         amdgpu_ring_write(ring, 0); /* mask */
6280         amdgpu_ring_write(ring, 0x20); /* poll interval */
6281
6282         /* compute doesn't have PFP */
6283         if (usepfp) {
6284                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6285                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6286                 amdgpu_ring_write(ring, 0x0);
6287                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6288                 amdgpu_ring_insert_nop(ring, 128);
6289         }
6290 }
6291
6292 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6293 {
6294         return ring->adev->wb.wb[ring->wptr_offs];
6295 }
6296
6297 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6298 {
6299         struct amdgpu_device *adev = ring->adev;
6300
6301         /* XXX check if swapping is necessary on BE */
6302         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6303         WDOORBELL32(ring->doorbell_index, ring->wptr);
6304 }
6305
6306 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6307                                              u64 addr, u64 seq,
6308                                              unsigned flags)
6309 {
6310         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6311         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6312
6313         /* RELEASE_MEM - flush caches, send int */
6314         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6315         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6316                                  EOP_TC_ACTION_EN |
6317                                  EOP_TC_WB_ACTION_EN |
6318                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6319                                  EVENT_INDEX(5)));
6320         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6321         amdgpu_ring_write(ring, addr & 0xfffffffc);
6322         amdgpu_ring_write(ring, upper_32_bits(addr));
6323         amdgpu_ring_write(ring, lower_32_bits(seq));
6324         amdgpu_ring_write(ring, upper_32_bits(seq));
6325 }
6326
6327 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6328 {
6329         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6330         amdgpu_ring_write(ring, 0);
6331 }
6332
6333 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6334 {
6335         uint32_t dw2 = 0;
6336
6337         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6338         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6339                 /* set load_global_config & load_global_uconfig */
6340                 dw2 |= 0x8001;
6341                 /* set load_cs_sh_regs */
6342                 dw2 |= 0x01000000;
6343                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6344                 dw2 |= 0x10002;
6345
6346                 /* set load_ce_ram if preamble presented */
6347                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6348                         dw2 |= 0x10000000;
6349         } else {
6350                 /* still load_ce_ram if this is the first time preamble presented
6351                  * although there is no context switch happens.
6352                  */
6353                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6354                         dw2 |= 0x10000000;
6355         }
6356
6357         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6358         amdgpu_ring_write(ring, dw2);
6359         amdgpu_ring_write(ring, 0);
6360 }
6361
6362 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6363                                                  enum amdgpu_interrupt_state state)
6364 {
6365         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6366                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6367 }
6368
6369 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6370                                                      int me, int pipe,
6371                                                      enum amdgpu_interrupt_state state)
6372 {
6373         /*
6374          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6375          * handles the setting of interrupts for this specific pipe. All other
6376          * pipes' interrupts are set by amdkfd.
6377          */
6378
6379         if (me == 1) {
6380                 switch (pipe) {
6381                 case 0:
6382                         break;
6383                 default:
6384                         DRM_DEBUG("invalid pipe %d\n", pipe);
6385                         return;
6386                 }
6387         } else {
6388                 DRM_DEBUG("invalid me %d\n", me);
6389                 return;
6390         }
6391
6392         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6393                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6394 }
6395
6396 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6397                                              struct amdgpu_irq_src *source,
6398                                              unsigned type,
6399                                              enum amdgpu_interrupt_state state)
6400 {
6401         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6402                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6403
6404         return 0;
6405 }
6406
6407 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6408                                               struct amdgpu_irq_src *source,
6409                                               unsigned type,
6410                                               enum amdgpu_interrupt_state state)
6411 {
6412         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6413                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6414
6415         return 0;
6416 }
6417
6418 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6419                                             struct amdgpu_irq_src *src,
6420                                             unsigned type,
6421                                             enum amdgpu_interrupt_state state)
6422 {
6423         switch (type) {
6424         case AMDGPU_CP_IRQ_GFX_EOP:
6425                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6426                 break;
6427         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6428                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6429                 break;
6430         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6431                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6432                 break;
6433         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6434                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6435                 break;
6436         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6437                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6438                 break;
6439         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6440                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6441                 break;
6442         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6443                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6444                 break;
6445         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6446                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6447                 break;
6448         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6449                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6450                 break;
6451         default:
6452                 break;
6453         }
6454         return 0;
6455 }
6456
6457 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6458                             struct amdgpu_irq_src *source,
6459                             struct amdgpu_iv_entry *entry)
6460 {
6461         int i;
6462         u8 me_id, pipe_id, queue_id;
6463         struct amdgpu_ring *ring;
6464
6465         DRM_DEBUG("IH: CP EOP\n");
6466         me_id = (entry->ring_id & 0x0c) >> 2;
6467         pipe_id = (entry->ring_id & 0x03) >> 0;
6468         queue_id = (entry->ring_id & 0x70) >> 4;
6469
6470         switch (me_id) {
6471         case 0:
6472                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6473                 break;
6474         case 1:
6475         case 2:
6476                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6477                         ring = &adev->gfx.compute_ring[i];
6478                         /* Per-queue interrupt is supported for MEC starting from VI.
6479                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6480                           */
6481                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6482                                 amdgpu_fence_process(ring);
6483                 }
6484                 break;
6485         }
6486         return 0;
6487 }
6488
6489 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6490                                  struct amdgpu_irq_src *source,
6491                                  struct amdgpu_iv_entry *entry)
6492 {
6493         DRM_ERROR("Illegal register access in command stream\n");
6494         schedule_work(&adev->reset_work);
6495         return 0;
6496 }
6497
6498 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6499                                   struct amdgpu_irq_src *source,
6500                                   struct amdgpu_iv_entry *entry)
6501 {
6502         DRM_ERROR("Illegal instruction in command stream\n");
6503         schedule_work(&adev->reset_work);
6504         return 0;
6505 }
6506
6507 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6508         .name = "gfx_v8_0",
6509         .early_init = gfx_v8_0_early_init,
6510         .late_init = gfx_v8_0_late_init,
6511         .sw_init = gfx_v8_0_sw_init,
6512         .sw_fini = gfx_v8_0_sw_fini,
6513         .hw_init = gfx_v8_0_hw_init,
6514         .hw_fini = gfx_v8_0_hw_fini,
6515         .suspend = gfx_v8_0_suspend,
6516         .resume = gfx_v8_0_resume,
6517         .is_idle = gfx_v8_0_is_idle,
6518         .wait_for_idle = gfx_v8_0_wait_for_idle,
6519         .check_soft_reset = gfx_v8_0_check_soft_reset,
6520         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6521         .soft_reset = gfx_v8_0_soft_reset,
6522         .post_soft_reset = gfx_v8_0_post_soft_reset,
6523         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6524         .set_powergating_state = gfx_v8_0_set_powergating_state,
6525 };
6526
6527 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6528         .type = AMDGPU_RING_TYPE_GFX,
6529         .align_mask = 0xff,
6530         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6531         .get_rptr = gfx_v8_0_ring_get_rptr,
6532         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6533         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6534         .emit_frame_size =
6535                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6536                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6537                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6538                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6539                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6540                 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6541                 2 + /* gfx_v8_ring_emit_sb */
6542                 3, /* gfx_v8_ring_emit_cntxcntl */
6543         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6544         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6545         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6546         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6547         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6548         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6549         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6550         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6551         .test_ring = gfx_v8_0_ring_test_ring,
6552         .test_ib = gfx_v8_0_ring_test_ib,
6553         .insert_nop = amdgpu_ring_insert_nop,
6554         .pad_ib = amdgpu_ring_generic_pad_ib,
6555         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6556         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6557 };
6558
6559 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6560         .type = AMDGPU_RING_TYPE_COMPUTE,
6561         .align_mask = 0xff,
6562         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6563         .get_rptr = gfx_v8_0_ring_get_rptr,
6564         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6565         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6566         .emit_frame_size =
6567                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6568                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6569                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6570                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6571                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6572                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6573         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6574         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6575         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6576         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6577         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6578         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6579         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6580         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6581         .test_ring = gfx_v8_0_ring_test_ring,
6582         .test_ib = gfx_v8_0_ring_test_ib,
6583         .insert_nop = amdgpu_ring_insert_nop,
6584         .pad_ib = amdgpu_ring_generic_pad_ib,
6585 };
6586
6587 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6588 {
6589         int i;
6590
6591         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6592                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6593
6594         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6595                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6596 }
6597
6598 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6599         .set = gfx_v8_0_set_eop_interrupt_state,
6600         .process = gfx_v8_0_eop_irq,
6601 };
6602
6603 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6604         .set = gfx_v8_0_set_priv_reg_fault_state,
6605         .process = gfx_v8_0_priv_reg_irq,
6606 };
6607
6608 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6609         .set = gfx_v8_0_set_priv_inst_fault_state,
6610         .process = gfx_v8_0_priv_inst_irq,
6611 };
6612
6613 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6614 {
6615         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6616         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6617
6618         adev->gfx.priv_reg_irq.num_types = 1;
6619         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6620
6621         adev->gfx.priv_inst_irq.num_types = 1;
6622         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6623 }
6624
6625 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6626 {
6627         switch (adev->asic_type) {
6628         case CHIP_TOPAZ:
6629                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6630                 break;
6631         case CHIP_STONEY:
6632         case CHIP_CARRIZO:
6633                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6634                 break;
6635         default:
6636                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6637                 break;
6638         }
6639 }
6640
6641 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6642 {
6643         /* init asci gds info */
6644         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6645         adev->gds.gws.total_size = 64;
6646         adev->gds.oa.total_size = 16;
6647
6648         if (adev->gds.mem.total_size == 64 * 1024) {
6649                 adev->gds.mem.gfx_partition_size = 4096;
6650                 adev->gds.mem.cs_partition_size = 4096;
6651
6652                 adev->gds.gws.gfx_partition_size = 4;
6653                 adev->gds.gws.cs_partition_size = 4;
6654
6655                 adev->gds.oa.gfx_partition_size = 4;
6656                 adev->gds.oa.cs_partition_size = 1;
6657         } else {
6658                 adev->gds.mem.gfx_partition_size = 1024;
6659                 adev->gds.mem.cs_partition_size = 1024;
6660
6661                 adev->gds.gws.gfx_partition_size = 16;
6662                 adev->gds.gws.cs_partition_size = 16;
6663
6664                 adev->gds.oa.gfx_partition_size = 4;
6665                 adev->gds.oa.cs_partition_size = 4;
6666         }
6667 }
6668
6669 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6670                                                  u32 bitmap)
6671 {
6672         u32 data;
6673
6674         if (!bitmap)
6675                 return;
6676
6677         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6678         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6679
6680         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6681 }
6682
6683 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6684 {
6685         u32 data, mask;
6686
6687         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6688                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6689
6690         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6691
6692         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6693 }
6694
6695 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6696 {
6697         int i, j, k, counter, active_cu_number = 0;
6698         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6699         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6700         unsigned disable_masks[4 * 2];
6701
6702         memset(cu_info, 0, sizeof(*cu_info));
6703
6704         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6705
6706         mutex_lock(&adev->grbm_idx_mutex);
6707         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6708                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6709                         mask = 1;
6710                         ao_bitmap = 0;
6711                         counter = 0;
6712                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6713                         if (i < 4 && j < 2)
6714                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6715                                         adev, disable_masks[i * 2 + j]);
6716                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6717                         cu_info->bitmap[i][j] = bitmap;
6718
6719                         for (k = 0; k < 16; k ++) {
6720                                 if (bitmap & mask) {
6721                                         if (counter < 2)
6722                                                 ao_bitmap |= mask;
6723                                         counter ++;
6724                                 }
6725                                 mask <<= 1;
6726                         }
6727                         active_cu_number += counter;
6728                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6729                 }
6730         }
6731         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6732         mutex_unlock(&adev->grbm_idx_mutex);
6733
6734         cu_info->number = active_cu_number;
6735         cu_info->ao_cu_mask = ao_cu_mask;
6736 }
6737
6738 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6739 {
6740         .type = AMD_IP_BLOCK_TYPE_GFX,
6741         .major = 8,
6742         .minor = 0,
6743         .rev = 0,
6744         .funcs = &gfx_v8_0_ip_funcs,
6745 };
6746
6747 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6748 {
6749         .type = AMD_IP_BLOCK_TYPE_GFX,
6750         .major = 8,
6751         .minor = 1,
6752         .rev = 0,
6753         .funcs = &gfx_v8_0_ip_funcs,
6754 };