Merge tag 'char-misc-4.10-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregk...
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660
661 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
662 {
663         switch (adev->asic_type) {
664         case CHIP_TOPAZ:
665                 amdgpu_program_register_sequence(adev,
666                                                  iceland_mgcg_cgcg_init,
667                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
668                 amdgpu_program_register_sequence(adev,
669                                                  golden_settings_iceland_a11,
670                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
671                 amdgpu_program_register_sequence(adev,
672                                                  iceland_golden_common_all,
673                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
674                 break;
675         case CHIP_FIJI:
676                 amdgpu_program_register_sequence(adev,
677                                                  fiji_mgcg_cgcg_init,
678                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
679                 amdgpu_program_register_sequence(adev,
680                                                  golden_settings_fiji_a10,
681                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
682                 amdgpu_program_register_sequence(adev,
683                                                  fiji_golden_common_all,
684                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
685                 break;
686
687         case CHIP_TONGA:
688                 amdgpu_program_register_sequence(adev,
689                                                  tonga_mgcg_cgcg_init,
690                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
691                 amdgpu_program_register_sequence(adev,
692                                                  golden_settings_tonga_a11,
693                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
694                 amdgpu_program_register_sequence(adev,
695                                                  tonga_golden_common_all,
696                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
697                 break;
698         case CHIP_POLARIS11:
699         case CHIP_POLARIS12:
700                 amdgpu_program_register_sequence(adev,
701                                                  golden_settings_polaris11_a11,
702                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
703                 amdgpu_program_register_sequence(adev,
704                                                  polaris11_golden_common_all,
705                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
706                 break;
707         case CHIP_POLARIS10:
708                 amdgpu_program_register_sequence(adev,
709                                                  golden_settings_polaris10_a11,
710                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
711                 amdgpu_program_register_sequence(adev,
712                                                  polaris10_golden_common_all,
713                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
714                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
715                 if (adev->pdev->revision == 0xc7 &&
716                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
717                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
718                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
719                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
720                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
721                 }
722                 break;
723         case CHIP_CARRIZO:
724                 amdgpu_program_register_sequence(adev,
725                                                  cz_mgcg_cgcg_init,
726                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
727                 amdgpu_program_register_sequence(adev,
728                                                  cz_golden_settings_a11,
729                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
730                 amdgpu_program_register_sequence(adev,
731                                                  cz_golden_common_all,
732                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
733                 break;
734         case CHIP_STONEY:
735                 amdgpu_program_register_sequence(adev,
736                                                  stoney_mgcg_cgcg_init,
737                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
738                 amdgpu_program_register_sequence(adev,
739                                                  stoney_golden_settings_a11,
740                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
741                 amdgpu_program_register_sequence(adev,
742                                                  stoney_golden_common_all,
743                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
744                 break;
745         default:
746                 break;
747         }
748 }
749
750 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
751 {
752         int i;
753
754         adev->gfx.scratch.num_reg = 7;
755         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
757                 adev->gfx.scratch.free[i] = true;
758                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
759         }
760 }
761
762 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
763 {
764         struct amdgpu_device *adev = ring->adev;
765         uint32_t scratch;
766         uint32_t tmp = 0;
767         unsigned i;
768         int r;
769
770         r = amdgpu_gfx_scratch_get(adev, &scratch);
771         if (r) {
772                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
773                 return r;
774         }
775         WREG32(scratch, 0xCAFEDEAD);
776         r = amdgpu_ring_alloc(ring, 3);
777         if (r) {
778                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
779                           ring->idx, r);
780                 amdgpu_gfx_scratch_free(adev, scratch);
781                 return r;
782         }
783         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
784         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
785         amdgpu_ring_write(ring, 0xDEADBEEF);
786         amdgpu_ring_commit(ring);
787
788         for (i = 0; i < adev->usec_timeout; i++) {
789                 tmp = RREG32(scratch);
790                 if (tmp == 0xDEADBEEF)
791                         break;
792                 DRM_UDELAY(1);
793         }
794         if (i < adev->usec_timeout) {
795                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
796                          ring->idx, i);
797         } else {
798                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
799                           ring->idx, scratch, tmp);
800                 r = -EINVAL;
801         }
802         amdgpu_gfx_scratch_free(adev, scratch);
803         return r;
804 }
805
806 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
807 {
808         struct amdgpu_device *adev = ring->adev;
809         struct amdgpu_ib ib;
810         struct dma_fence *f = NULL;
811         uint32_t scratch;
812         uint32_t tmp = 0;
813         long r;
814
815         r = amdgpu_gfx_scratch_get(adev, &scratch);
816         if (r) {
817                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
818                 return r;
819         }
820         WREG32(scratch, 0xCAFEDEAD);
821         memset(&ib, 0, sizeof(ib));
822         r = amdgpu_ib_get(adev, NULL, 256, &ib);
823         if (r) {
824                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
825                 goto err1;
826         }
827         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
828         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
829         ib.ptr[2] = 0xDEADBEEF;
830         ib.length_dw = 3;
831
832         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
833         if (r)
834                 goto err2;
835
836         r = dma_fence_wait_timeout(f, false, timeout);
837         if (r == 0) {
838                 DRM_ERROR("amdgpu: IB test timed out.\n");
839                 r = -ETIMEDOUT;
840                 goto err2;
841         } else if (r < 0) {
842                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
843                 goto err2;
844         }
845         tmp = RREG32(scratch);
846         if (tmp == 0xDEADBEEF) {
847                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
848                 r = 0;
849         } else {
850                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
851                           scratch, tmp);
852                 r = -EINVAL;
853         }
854 err2:
855         amdgpu_ib_free(adev, &ib, NULL);
856         dma_fence_put(f);
857 err1:
858         amdgpu_gfx_scratch_free(adev, scratch);
859         return r;
860 }
861
862
863 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
864         release_firmware(adev->gfx.pfp_fw);
865         adev->gfx.pfp_fw = NULL;
866         release_firmware(adev->gfx.me_fw);
867         adev->gfx.me_fw = NULL;
868         release_firmware(adev->gfx.ce_fw);
869         adev->gfx.ce_fw = NULL;
870         release_firmware(adev->gfx.rlc_fw);
871         adev->gfx.rlc_fw = NULL;
872         release_firmware(adev->gfx.mec_fw);
873         adev->gfx.mec_fw = NULL;
874         if ((adev->asic_type != CHIP_STONEY) &&
875             (adev->asic_type != CHIP_TOPAZ))
876                 release_firmware(adev->gfx.mec2_fw);
877         adev->gfx.mec2_fw = NULL;
878
879         kfree(adev->gfx.rlc.register_list_format);
880 }
881
882 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
883 {
884         const char *chip_name;
885         char fw_name[30];
886         int err;
887         struct amdgpu_firmware_info *info = NULL;
888         const struct common_firmware_header *header = NULL;
889         const struct gfx_firmware_header_v1_0 *cp_hdr;
890         const struct rlc_firmware_header_v2_0 *rlc_hdr;
891         unsigned int *tmp = NULL, i;
892
893         DRM_DEBUG("\n");
894
895         switch (adev->asic_type) {
896         case CHIP_TOPAZ:
897                 chip_name = "topaz";
898                 break;
899         case CHIP_TONGA:
900                 chip_name = "tonga";
901                 break;
902         case CHIP_CARRIZO:
903                 chip_name = "carrizo";
904                 break;
905         case CHIP_FIJI:
906                 chip_name = "fiji";
907                 break;
908         case CHIP_POLARIS11:
909                 chip_name = "polaris11";
910                 break;
911         case CHIP_POLARIS10:
912                 chip_name = "polaris10";
913                 break;
914         case CHIP_POLARIS12:
915                 chip_name = "polaris12";
916                 break;
917         case CHIP_STONEY:
918                 chip_name = "stoney";
919                 break;
920         default:
921                 BUG();
922         }
923
924         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
925         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
926         if (err)
927                 goto out;
928         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
929         if (err)
930                 goto out;
931         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
932         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934
935         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
936         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
937         if (err)
938                 goto out;
939         err = amdgpu_ucode_validate(adev->gfx.me_fw);
940         if (err)
941                 goto out;
942         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
943         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
945
946         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
947         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
948         if (err)
949                 goto out;
950         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
951         if (err)
952                 goto out;
953         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
954         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956
957         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
958         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
959         if (err)
960                 goto out;
961         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
962         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
963         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
964         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
965
966         adev->gfx.rlc.save_and_restore_offset =
967                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
968         adev->gfx.rlc.clear_state_descriptor_offset =
969                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
970         adev->gfx.rlc.avail_scratch_ram_locations =
971                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
972         adev->gfx.rlc.reg_restore_list_size =
973                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
974         adev->gfx.rlc.reg_list_format_start =
975                         le32_to_cpu(rlc_hdr->reg_list_format_start);
976         adev->gfx.rlc.reg_list_format_separate_start =
977                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
978         adev->gfx.rlc.starting_offsets_start =
979                         le32_to_cpu(rlc_hdr->starting_offsets_start);
980         adev->gfx.rlc.reg_list_format_size_bytes =
981                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
982         adev->gfx.rlc.reg_list_size_bytes =
983                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
984
985         adev->gfx.rlc.register_list_format =
986                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
987                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
988
989         if (!adev->gfx.rlc.register_list_format) {
990                 err = -ENOMEM;
991                 goto out;
992         }
993
994         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
995                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
996         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
997                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
998
999         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1000
1001         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1002                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1003         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1004                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1005
1006         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1007         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1008         if (err)
1009                 goto out;
1010         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1011         if (err)
1012                 goto out;
1013         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1014         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1015         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1016
1017         if ((adev->asic_type != CHIP_STONEY) &&
1018             (adev->asic_type != CHIP_TOPAZ)) {
1019                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1020                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1021                 if (!err) {
1022                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1023                         if (err)
1024                                 goto out;
1025                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1026                                 adev->gfx.mec2_fw->data;
1027                         adev->gfx.mec2_fw_version =
1028                                 le32_to_cpu(cp_hdr->header.ucode_version);
1029                         adev->gfx.mec2_feature_version =
1030                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1031                 } else {
1032                         err = 0;
1033                         adev->gfx.mec2_fw = NULL;
1034                 }
1035         }
1036
1037         if (adev->firmware.smu_load) {
1038                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1039                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1040                 info->fw = adev->gfx.pfp_fw;
1041                 header = (const struct common_firmware_header *)info->fw->data;
1042                 adev->firmware.fw_size +=
1043                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1044
1045                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1046                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1047                 info->fw = adev->gfx.me_fw;
1048                 header = (const struct common_firmware_header *)info->fw->data;
1049                 adev->firmware.fw_size +=
1050                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1051
1052                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1053                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1054                 info->fw = adev->gfx.ce_fw;
1055                 header = (const struct common_firmware_header *)info->fw->data;
1056                 adev->firmware.fw_size +=
1057                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1058
1059                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1060                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1061                 info->fw = adev->gfx.rlc_fw;
1062                 header = (const struct common_firmware_header *)info->fw->data;
1063                 adev->firmware.fw_size +=
1064                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065
1066                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1067                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1068                 info->fw = adev->gfx.mec_fw;
1069                 header = (const struct common_firmware_header *)info->fw->data;
1070                 adev->firmware.fw_size +=
1071                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1072
1073                 /* we need account JT in */
1074                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075                 adev->firmware.fw_size +=
1076                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1077
1078                 if (amdgpu_sriov_vf(adev)) {
1079                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1080                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1081                         info->fw = adev->gfx.mec_fw;
1082                         adev->firmware.fw_size +=
1083                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1084                 }
1085
1086                 if (adev->gfx.mec2_fw) {
1087                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1088                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1089                         info->fw = adev->gfx.mec2_fw;
1090                         header = (const struct common_firmware_header *)info->fw->data;
1091                         adev->firmware.fw_size +=
1092                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1093                 }
1094
1095         }
1096
1097 out:
1098         if (err) {
1099                 dev_err(adev->dev,
1100                         "gfx8: Failed to load firmware \"%s\"\n",
1101                         fw_name);
1102                 release_firmware(adev->gfx.pfp_fw);
1103                 adev->gfx.pfp_fw = NULL;
1104                 release_firmware(adev->gfx.me_fw);
1105                 adev->gfx.me_fw = NULL;
1106                 release_firmware(adev->gfx.ce_fw);
1107                 adev->gfx.ce_fw = NULL;
1108                 release_firmware(adev->gfx.rlc_fw);
1109                 adev->gfx.rlc_fw = NULL;
1110                 release_firmware(adev->gfx.mec_fw);
1111                 adev->gfx.mec_fw = NULL;
1112                 release_firmware(adev->gfx.mec2_fw);
1113                 adev->gfx.mec2_fw = NULL;
1114         }
1115         return err;
1116 }
1117
1118 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1119                                     volatile u32 *buffer)
1120 {
1121         u32 count = 0, i;
1122         const struct cs_section_def *sect = NULL;
1123         const struct cs_extent_def *ext = NULL;
1124
1125         if (adev->gfx.rlc.cs_data == NULL)
1126                 return;
1127         if (buffer == NULL)
1128                 return;
1129
1130         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1131         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1132
1133         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1134         buffer[count++] = cpu_to_le32(0x80000000);
1135         buffer[count++] = cpu_to_le32(0x80000000);
1136
1137         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1138                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1139                         if (sect->id == SECT_CONTEXT) {
1140                                 buffer[count++] =
1141                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1142                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1143                                                 PACKET3_SET_CONTEXT_REG_START);
1144                                 for (i = 0; i < ext->reg_count; i++)
1145                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1146                         } else {
1147                                 return;
1148                         }
1149                 }
1150         }
1151
1152         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1153         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1154                         PACKET3_SET_CONTEXT_REG_START);
1155         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1156         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1157
1158         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1159         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1160
1161         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1162         buffer[count++] = cpu_to_le32(0);
1163 }
1164
1165 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1166 {
1167         const __le32 *fw_data;
1168         volatile u32 *dst_ptr;
1169         int me, i, max_me = 4;
1170         u32 bo_offset = 0;
1171         u32 table_offset, table_size;
1172
1173         if (adev->asic_type == CHIP_CARRIZO)
1174                 max_me = 5;
1175
1176         /* write the cp table buffer */
1177         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1178         for (me = 0; me < max_me; me++) {
1179                 if (me == 0) {
1180                         const struct gfx_firmware_header_v1_0 *hdr =
1181                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1182                         fw_data = (const __le32 *)
1183                                 (adev->gfx.ce_fw->data +
1184                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1185                         table_offset = le32_to_cpu(hdr->jt_offset);
1186                         table_size = le32_to_cpu(hdr->jt_size);
1187                 } else if (me == 1) {
1188                         const struct gfx_firmware_header_v1_0 *hdr =
1189                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1190                         fw_data = (const __le32 *)
1191                                 (adev->gfx.pfp_fw->data +
1192                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1193                         table_offset = le32_to_cpu(hdr->jt_offset);
1194                         table_size = le32_to_cpu(hdr->jt_size);
1195                 } else if (me == 2) {
1196                         const struct gfx_firmware_header_v1_0 *hdr =
1197                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1198                         fw_data = (const __le32 *)
1199                                 (adev->gfx.me_fw->data +
1200                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1201                         table_offset = le32_to_cpu(hdr->jt_offset);
1202                         table_size = le32_to_cpu(hdr->jt_size);
1203                 } else if (me == 3) {
1204                         const struct gfx_firmware_header_v1_0 *hdr =
1205                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1206                         fw_data = (const __le32 *)
1207                                 (adev->gfx.mec_fw->data +
1208                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1209                         table_offset = le32_to_cpu(hdr->jt_offset);
1210                         table_size = le32_to_cpu(hdr->jt_size);
1211                 } else  if (me == 4) {
1212                         const struct gfx_firmware_header_v1_0 *hdr =
1213                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1214                         fw_data = (const __le32 *)
1215                                 (adev->gfx.mec2_fw->data +
1216                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1217                         table_offset = le32_to_cpu(hdr->jt_offset);
1218                         table_size = le32_to_cpu(hdr->jt_size);
1219                 }
1220
1221                 for (i = 0; i < table_size; i ++) {
1222                         dst_ptr[bo_offset + i] =
1223                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1224                 }
1225
1226                 bo_offset += table_size;
1227         }
1228 }
1229
1230 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1231 {
1232         int r;
1233
1234         /* clear state block */
1235         if (adev->gfx.rlc.clear_state_obj) {
1236                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1237                 if (unlikely(r != 0))
1238                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1239                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1240                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1241                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1242                 adev->gfx.rlc.clear_state_obj = NULL;
1243         }
1244
1245         /* jump table block */
1246         if (adev->gfx.rlc.cp_table_obj) {
1247                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1248                 if (unlikely(r != 0))
1249                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1250                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1251                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1252                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253                 adev->gfx.rlc.cp_table_obj = NULL;
1254         }
1255 }
1256
1257 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1258 {
1259         volatile u32 *dst_ptr;
1260         u32 dws;
1261         const struct cs_section_def *cs_data;
1262         int r;
1263
1264         adev->gfx.rlc.cs_data = vi_cs_data;
1265
1266         cs_data = adev->gfx.rlc.cs_data;
1267
1268         if (cs_data) {
1269                 /* clear state block */
1270                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1271
1272                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1273                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274                                              AMDGPU_GEM_DOMAIN_VRAM,
1275                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1276                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1277                                              NULL, NULL,
1278                                              &adev->gfx.rlc.clear_state_obj);
1279                         if (r) {
1280                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281                                 gfx_v8_0_rlc_fini(adev);
1282                                 return r;
1283                         }
1284                 }
1285                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286                 if (unlikely(r != 0)) {
1287                         gfx_v8_0_rlc_fini(adev);
1288                         return r;
1289                 }
1290                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291                                   &adev->gfx.rlc.clear_state_gpu_addr);
1292                 if (r) {
1293                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295                         gfx_v8_0_rlc_fini(adev);
1296                         return r;
1297                 }
1298
1299                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300                 if (r) {
1301                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302                         gfx_v8_0_rlc_fini(adev);
1303                         return r;
1304                 }
1305                 /* set up the cs buffer */
1306                 dst_ptr = adev->gfx.rlc.cs_ptr;
1307                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310         }
1311
1312         if ((adev->asic_type == CHIP_CARRIZO) ||
1313             (adev->asic_type == CHIP_STONEY)) {
1314                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1316                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317                                              AMDGPU_GEM_DOMAIN_VRAM,
1318                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1319                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1320                                              NULL, NULL,
1321                                              &adev->gfx.rlc.cp_table_obj);
1322                         if (r) {
1323                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1324                                 return r;
1325                         }
1326                 }
1327
1328                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1329                 if (unlikely(r != 0)) {
1330                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1331                         return r;
1332                 }
1333                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1334                                   &adev->gfx.rlc.cp_table_gpu_addr);
1335                 if (r) {
1336                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1337                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1338                         return r;
1339                 }
1340                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1341                 if (r) {
1342                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1343                         return r;
1344                 }
1345
1346                 cz_init_cp_jump_table(adev);
1347
1348                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1349                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1350         }
1351
1352         return 0;
1353 }
1354
1355 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1356 {
1357         int r;
1358
1359         if (adev->gfx.mec.hpd_eop_obj) {
1360                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1361                 if (unlikely(r != 0))
1362                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1363                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1364                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1365                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1366                 adev->gfx.mec.hpd_eop_obj = NULL;
1367         }
1368 }
1369
1370 #define MEC_HPD_SIZE 2048
1371
1372 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1373 {
1374         int r;
1375         u32 *hpd;
1376
1377         /*
1378          * we assign only 1 pipe because all other pipes will
1379          * be handled by KFD
1380          */
1381         adev->gfx.mec.num_mec = 1;
1382         adev->gfx.mec.num_pipe = 1;
1383         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1384
1385         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1386                 r = amdgpu_bo_create(adev,
1387                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1388                                      PAGE_SIZE, true,
1389                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1390                                      &adev->gfx.mec.hpd_eop_obj);
1391                 if (r) {
1392                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1393                         return r;
1394                 }
1395         }
1396
1397         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1398         if (unlikely(r != 0)) {
1399                 gfx_v8_0_mec_fini(adev);
1400                 return r;
1401         }
1402         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1403                           &adev->gfx.mec.hpd_eop_gpu_addr);
1404         if (r) {
1405                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1406                 gfx_v8_0_mec_fini(adev);
1407                 return r;
1408         }
1409         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1410         if (r) {
1411                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1412                 gfx_v8_0_mec_fini(adev);
1413                 return r;
1414         }
1415
1416         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1417
1418         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1419         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1420
1421         return 0;
1422 }
1423
1424 static const u32 vgpr_init_compute_shader[] =
1425 {
1426         0x7e000209, 0x7e020208,
1427         0x7e040207, 0x7e060206,
1428         0x7e080205, 0x7e0a0204,
1429         0x7e0c0203, 0x7e0e0202,
1430         0x7e100201, 0x7e120200,
1431         0x7e140209, 0x7e160208,
1432         0x7e180207, 0x7e1a0206,
1433         0x7e1c0205, 0x7e1e0204,
1434         0x7e200203, 0x7e220202,
1435         0x7e240201, 0x7e260200,
1436         0x7e280209, 0x7e2a0208,
1437         0x7e2c0207, 0x7e2e0206,
1438         0x7e300205, 0x7e320204,
1439         0x7e340203, 0x7e360202,
1440         0x7e380201, 0x7e3a0200,
1441         0x7e3c0209, 0x7e3e0208,
1442         0x7e400207, 0x7e420206,
1443         0x7e440205, 0x7e460204,
1444         0x7e480203, 0x7e4a0202,
1445         0x7e4c0201, 0x7e4e0200,
1446         0x7e500209, 0x7e520208,
1447         0x7e540207, 0x7e560206,
1448         0x7e580205, 0x7e5a0204,
1449         0x7e5c0203, 0x7e5e0202,
1450         0x7e600201, 0x7e620200,
1451         0x7e640209, 0x7e660208,
1452         0x7e680207, 0x7e6a0206,
1453         0x7e6c0205, 0x7e6e0204,
1454         0x7e700203, 0x7e720202,
1455         0x7e740201, 0x7e760200,
1456         0x7e780209, 0x7e7a0208,
1457         0x7e7c0207, 0x7e7e0206,
1458         0xbf8a0000, 0xbf810000,
1459 };
1460
1461 static const u32 sgpr_init_compute_shader[] =
1462 {
1463         0xbe8a0100, 0xbe8c0102,
1464         0xbe8e0104, 0xbe900106,
1465         0xbe920108, 0xbe940100,
1466         0xbe960102, 0xbe980104,
1467         0xbe9a0106, 0xbe9c0108,
1468         0xbe9e0100, 0xbea00102,
1469         0xbea20104, 0xbea40106,
1470         0xbea60108, 0xbea80100,
1471         0xbeaa0102, 0xbeac0104,
1472         0xbeae0106, 0xbeb00108,
1473         0xbeb20100, 0xbeb40102,
1474         0xbeb60104, 0xbeb80106,
1475         0xbeba0108, 0xbebc0100,
1476         0xbebe0102, 0xbec00104,
1477         0xbec20106, 0xbec40108,
1478         0xbec60100, 0xbec80102,
1479         0xbee60004, 0xbee70005,
1480         0xbeea0006, 0xbeeb0007,
1481         0xbee80008, 0xbee90009,
1482         0xbefc0000, 0xbf8a0000,
1483         0xbf810000, 0x00000000,
1484 };
1485
1486 static const u32 vgpr_init_regs[] =
1487 {
1488         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1489         mmCOMPUTE_RESOURCE_LIMITS, 0,
1490         mmCOMPUTE_NUM_THREAD_X, 256*4,
1491         mmCOMPUTE_NUM_THREAD_Y, 1,
1492         mmCOMPUTE_NUM_THREAD_Z, 1,
1493         mmCOMPUTE_PGM_RSRC2, 20,
1494         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1495         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1496         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1497         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1498         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1499         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1500         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1501         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1502         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1503         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1504 };
1505
1506 static const u32 sgpr1_init_regs[] =
1507 {
1508         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1509         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1510         mmCOMPUTE_NUM_THREAD_X, 256*5,
1511         mmCOMPUTE_NUM_THREAD_Y, 1,
1512         mmCOMPUTE_NUM_THREAD_Z, 1,
1513         mmCOMPUTE_PGM_RSRC2, 20,
1514         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1515         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1516         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1517         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1518         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1519         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1520         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1521         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1522         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1523         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1524 };
1525
1526 static const u32 sgpr2_init_regs[] =
1527 {
1528         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1529         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1530         mmCOMPUTE_NUM_THREAD_X, 256*5,
1531         mmCOMPUTE_NUM_THREAD_Y, 1,
1532         mmCOMPUTE_NUM_THREAD_Z, 1,
1533         mmCOMPUTE_PGM_RSRC2, 20,
1534         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1535         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1536         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1537         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1538         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1539         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1540         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1541         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1542         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1543         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1544 };
1545
1546 static const u32 sec_ded_counter_registers[] =
1547 {
1548         mmCPC_EDC_ATC_CNT,
1549         mmCPC_EDC_SCRATCH_CNT,
1550         mmCPC_EDC_UCODE_CNT,
1551         mmCPF_EDC_ATC_CNT,
1552         mmCPF_EDC_ROQ_CNT,
1553         mmCPF_EDC_TAG_CNT,
1554         mmCPG_EDC_ATC_CNT,
1555         mmCPG_EDC_DMA_CNT,
1556         mmCPG_EDC_TAG_CNT,
1557         mmDC_EDC_CSINVOC_CNT,
1558         mmDC_EDC_RESTORE_CNT,
1559         mmDC_EDC_STATE_CNT,
1560         mmGDS_EDC_CNT,
1561         mmGDS_EDC_GRBM_CNT,
1562         mmGDS_EDC_OA_DED,
1563         mmSPI_EDC_CNT,
1564         mmSQC_ATC_EDC_GATCL1_CNT,
1565         mmSQC_EDC_CNT,
1566         mmSQ_EDC_DED_CNT,
1567         mmSQ_EDC_INFO,
1568         mmSQ_EDC_SEC_CNT,
1569         mmTCC_EDC_CNT,
1570         mmTCP_ATC_EDC_GATCL1_CNT,
1571         mmTCP_EDC_CNT,
1572         mmTD_EDC_CNT
1573 };
1574
1575 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1576 {
1577         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1578         struct amdgpu_ib ib;
1579         struct dma_fence *f = NULL;
1580         int r, i;
1581         u32 tmp;
1582         unsigned total_size, vgpr_offset, sgpr_offset;
1583         u64 gpu_addr;
1584
1585         /* only supported on CZ */
1586         if (adev->asic_type != CHIP_CARRIZO)
1587                 return 0;
1588
1589         /* bail if the compute ring is not ready */
1590         if (!ring->ready)
1591                 return 0;
1592
1593         tmp = RREG32(mmGB_EDC_MODE);
1594         WREG32(mmGB_EDC_MODE, 0);
1595
1596         total_size =
1597                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598         total_size +=
1599                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1600         total_size +=
1601                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1602         total_size = ALIGN(total_size, 256);
1603         vgpr_offset = total_size;
1604         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1605         sgpr_offset = total_size;
1606         total_size += sizeof(sgpr_init_compute_shader);
1607
1608         /* allocate an indirect buffer to put the commands in */
1609         memset(&ib, 0, sizeof(ib));
1610         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1611         if (r) {
1612                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1613                 return r;
1614         }
1615
1616         /* load the compute shaders */
1617         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1618                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1619
1620         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1621                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1622
1623         /* init the ib length to 0 */
1624         ib.length_dw = 0;
1625
1626         /* VGPR */
1627         /* write the register state for the compute dispatch */
1628         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1629                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1630                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1631                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1632         }
1633         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1634         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1635         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1636         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1637         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1638         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1639
1640         /* write dispatch packet */
1641         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1642         ib.ptr[ib.length_dw++] = 8; /* x */
1643         ib.ptr[ib.length_dw++] = 1; /* y */
1644         ib.ptr[ib.length_dw++] = 1; /* z */
1645         ib.ptr[ib.length_dw++] =
1646                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1647
1648         /* write CS partial flush packet */
1649         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1650         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1651
1652         /* SGPR1 */
1653         /* write the register state for the compute dispatch */
1654         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1655                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1656                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1657                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1658         }
1659         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1660         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1661         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1662         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1663         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1664         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1665
1666         /* write dispatch packet */
1667         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1668         ib.ptr[ib.length_dw++] = 8; /* x */
1669         ib.ptr[ib.length_dw++] = 1; /* y */
1670         ib.ptr[ib.length_dw++] = 1; /* z */
1671         ib.ptr[ib.length_dw++] =
1672                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1673
1674         /* write CS partial flush packet */
1675         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1676         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1677
1678         /* SGPR2 */
1679         /* write the register state for the compute dispatch */
1680         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1681                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1682                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1683                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1684         }
1685         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1686         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1687         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1688         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1689         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1690         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1691
1692         /* write dispatch packet */
1693         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1694         ib.ptr[ib.length_dw++] = 8; /* x */
1695         ib.ptr[ib.length_dw++] = 1; /* y */
1696         ib.ptr[ib.length_dw++] = 1; /* z */
1697         ib.ptr[ib.length_dw++] =
1698                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1699
1700         /* write CS partial flush packet */
1701         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1702         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1703
1704         /* shedule the ib on the ring */
1705         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1706         if (r) {
1707                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1708                 goto fail;
1709         }
1710
1711         /* wait for the GPU to finish processing the IB */
1712         r = dma_fence_wait(f, false);
1713         if (r) {
1714                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1715                 goto fail;
1716         }
1717
1718         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1719         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1720         WREG32(mmGB_EDC_MODE, tmp);
1721
1722         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1723         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1724         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1725
1726
1727         /* read back registers to clear the counters */
1728         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1729                 RREG32(sec_ded_counter_registers[i]);
1730
1731 fail:
1732         amdgpu_ib_free(adev, &ib, NULL);
1733         dma_fence_put(f);
1734
1735         return r;
1736 }
1737
1738 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1739 {
1740         u32 gb_addr_config;
1741         u32 mc_shared_chmap, mc_arb_ramcfg;
1742         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1743         u32 tmp;
1744         int ret;
1745
1746         switch (adev->asic_type) {
1747         case CHIP_TOPAZ:
1748                 adev->gfx.config.max_shader_engines = 1;
1749                 adev->gfx.config.max_tile_pipes = 2;
1750                 adev->gfx.config.max_cu_per_sh = 6;
1751                 adev->gfx.config.max_sh_per_se = 1;
1752                 adev->gfx.config.max_backends_per_se = 2;
1753                 adev->gfx.config.max_texture_channel_caches = 2;
1754                 adev->gfx.config.max_gprs = 256;
1755                 adev->gfx.config.max_gs_threads = 32;
1756                 adev->gfx.config.max_hw_contexts = 8;
1757
1758                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1759                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1760                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1761                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1762                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1763                 break;
1764         case CHIP_FIJI:
1765                 adev->gfx.config.max_shader_engines = 4;
1766                 adev->gfx.config.max_tile_pipes = 16;
1767                 adev->gfx.config.max_cu_per_sh = 16;
1768                 adev->gfx.config.max_sh_per_se = 1;
1769                 adev->gfx.config.max_backends_per_se = 4;
1770                 adev->gfx.config.max_texture_channel_caches = 16;
1771                 adev->gfx.config.max_gprs = 256;
1772                 adev->gfx.config.max_gs_threads = 32;
1773                 adev->gfx.config.max_hw_contexts = 8;
1774
1775                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1776                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1777                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1778                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1779                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1780                 break;
1781         case CHIP_POLARIS11:
1782         case CHIP_POLARIS12:
1783                 ret = amdgpu_atombios_get_gfx_info(adev);
1784                 if (ret)
1785                         return ret;
1786                 adev->gfx.config.max_gprs = 256;
1787                 adev->gfx.config.max_gs_threads = 32;
1788                 adev->gfx.config.max_hw_contexts = 8;
1789
1790                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1791                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1792                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1793                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1794                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1795                 break;
1796         case CHIP_POLARIS10:
1797                 ret = amdgpu_atombios_get_gfx_info(adev);
1798                 if (ret)
1799                         return ret;
1800                 adev->gfx.config.max_gprs = 256;
1801                 adev->gfx.config.max_gs_threads = 32;
1802                 adev->gfx.config.max_hw_contexts = 8;
1803
1804                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1805                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1806                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1807                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1808                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1809                 break;
1810         case CHIP_TONGA:
1811                 adev->gfx.config.max_shader_engines = 4;
1812                 adev->gfx.config.max_tile_pipes = 8;
1813                 adev->gfx.config.max_cu_per_sh = 8;
1814                 adev->gfx.config.max_sh_per_se = 1;
1815                 adev->gfx.config.max_backends_per_se = 2;
1816                 adev->gfx.config.max_texture_channel_caches = 8;
1817                 adev->gfx.config.max_gprs = 256;
1818                 adev->gfx.config.max_gs_threads = 32;
1819                 adev->gfx.config.max_hw_contexts = 8;
1820
1821                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1822                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1823                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1824                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1825                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1826                 break;
1827         case CHIP_CARRIZO:
1828                 adev->gfx.config.max_shader_engines = 1;
1829                 adev->gfx.config.max_tile_pipes = 2;
1830                 adev->gfx.config.max_sh_per_se = 1;
1831                 adev->gfx.config.max_backends_per_se = 2;
1832
1833                 switch (adev->pdev->revision) {
1834                 case 0xc4:
1835                 case 0x84:
1836                 case 0xc8:
1837                 case 0xcc:
1838                 case 0xe1:
1839                 case 0xe3:
1840                         /* B10 */
1841                         adev->gfx.config.max_cu_per_sh = 8;
1842                         break;
1843                 case 0xc5:
1844                 case 0x81:
1845                 case 0x85:
1846                 case 0xc9:
1847                 case 0xcd:
1848                 case 0xe2:
1849                 case 0xe4:
1850                         /* B8 */
1851                         adev->gfx.config.max_cu_per_sh = 6;
1852                         break;
1853                 case 0xc6:
1854                 case 0xca:
1855                 case 0xce:
1856                 case 0x88:
1857                         /* B6 */
1858                         adev->gfx.config.max_cu_per_sh = 6;
1859                         break;
1860                 case 0xc7:
1861                 case 0x87:
1862                 case 0xcb:
1863                 case 0xe5:
1864                 case 0x89:
1865                 default:
1866                         /* B4 */
1867                         adev->gfx.config.max_cu_per_sh = 4;
1868                         break;
1869                 }
1870
1871                 adev->gfx.config.max_texture_channel_caches = 2;
1872                 adev->gfx.config.max_gprs = 256;
1873                 adev->gfx.config.max_gs_threads = 32;
1874                 adev->gfx.config.max_hw_contexts = 8;
1875
1876                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1877                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1878                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1879                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1880                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1881                 break;
1882         case CHIP_STONEY:
1883                 adev->gfx.config.max_shader_engines = 1;
1884                 adev->gfx.config.max_tile_pipes = 2;
1885                 adev->gfx.config.max_sh_per_se = 1;
1886                 adev->gfx.config.max_backends_per_se = 1;
1887
1888                 switch (adev->pdev->revision) {
1889                 case 0xc0:
1890                 case 0xc1:
1891                 case 0xc2:
1892                 case 0xc4:
1893                 case 0xc8:
1894                 case 0xc9:
1895                         adev->gfx.config.max_cu_per_sh = 3;
1896                         break;
1897                 case 0xd0:
1898                 case 0xd1:
1899                 case 0xd2:
1900                 default:
1901                         adev->gfx.config.max_cu_per_sh = 2;
1902                         break;
1903                 }
1904
1905                 adev->gfx.config.max_texture_channel_caches = 2;
1906                 adev->gfx.config.max_gprs = 256;
1907                 adev->gfx.config.max_gs_threads = 16;
1908                 adev->gfx.config.max_hw_contexts = 8;
1909
1910                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1911                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1912                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1913                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1914                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1915                 break;
1916         default:
1917                 adev->gfx.config.max_shader_engines = 2;
1918                 adev->gfx.config.max_tile_pipes = 4;
1919                 adev->gfx.config.max_cu_per_sh = 2;
1920                 adev->gfx.config.max_sh_per_se = 1;
1921                 adev->gfx.config.max_backends_per_se = 2;
1922                 adev->gfx.config.max_texture_channel_caches = 4;
1923                 adev->gfx.config.max_gprs = 256;
1924                 adev->gfx.config.max_gs_threads = 32;
1925                 adev->gfx.config.max_hw_contexts = 8;
1926
1927                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1928                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1929                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1930                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1931                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1932                 break;
1933         }
1934
1935         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1936         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1937         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1938
1939         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1940         adev->gfx.config.mem_max_burst_length_bytes = 256;
1941         if (adev->flags & AMD_IS_APU) {
1942                 /* Get memory bank mapping mode. */
1943                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1944                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1945                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1946
1947                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1948                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1949                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1950
1951                 /* Validate settings in case only one DIMM installed. */
1952                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1953                         dimm00_addr_map = 0;
1954                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1955                         dimm01_addr_map = 0;
1956                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1957                         dimm10_addr_map = 0;
1958                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1959                         dimm11_addr_map = 0;
1960
1961                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1962                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1963                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1964                         adev->gfx.config.mem_row_size_in_kb = 2;
1965                 else
1966                         adev->gfx.config.mem_row_size_in_kb = 1;
1967         } else {
1968                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1969                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1970                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1971                         adev->gfx.config.mem_row_size_in_kb = 4;
1972         }
1973
1974         adev->gfx.config.shader_engine_tile_size = 32;
1975         adev->gfx.config.num_gpus = 1;
1976         adev->gfx.config.multi_gpu_tile_size = 64;
1977
1978         /* fix up row size */
1979         switch (adev->gfx.config.mem_row_size_in_kb) {
1980         case 1:
1981         default:
1982                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1983                 break;
1984         case 2:
1985                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1986                 break;
1987         case 4:
1988                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1989                 break;
1990         }
1991         adev->gfx.config.gb_addr_config = gb_addr_config;
1992
1993         return 0;
1994 }
1995
1996 static int gfx_v8_0_sw_init(void *handle)
1997 {
1998         int i, r;
1999         struct amdgpu_ring *ring;
2000         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2001
2002         /* EOP Event */
2003         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2004         if (r)
2005                 return r;
2006
2007         /* Privileged reg */
2008         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2009         if (r)
2010                 return r;
2011
2012         /* Privileged inst */
2013         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2014         if (r)
2015                 return r;
2016
2017         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2018
2019         gfx_v8_0_scratch_init(adev);
2020
2021         r = gfx_v8_0_init_microcode(adev);
2022         if (r) {
2023                 DRM_ERROR("Failed to load gfx firmware!\n");
2024                 return r;
2025         }
2026
2027         r = gfx_v8_0_rlc_init(adev);
2028         if (r) {
2029                 DRM_ERROR("Failed to init rlc BOs!\n");
2030                 return r;
2031         }
2032
2033         r = gfx_v8_0_mec_init(adev);
2034         if (r) {
2035                 DRM_ERROR("Failed to init MEC BOs!\n");
2036                 return r;
2037         }
2038
2039         /* set up the gfx ring */
2040         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2041                 ring = &adev->gfx.gfx_ring[i];
2042                 ring->ring_obj = NULL;
2043                 sprintf(ring->name, "gfx");
2044                 /* no gfx doorbells on iceland */
2045                 if (adev->asic_type != CHIP_TOPAZ) {
2046                         ring->use_doorbell = true;
2047                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2048                 }
2049
2050                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2051                                      AMDGPU_CP_IRQ_GFX_EOP);
2052                 if (r)
2053                         return r;
2054         }
2055
2056         /* set up the compute queues */
2057         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058                 unsigned irq_type;
2059
2060                 /* max 32 queues per MEC */
2061                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2063                         break;
2064                 }
2065                 ring = &adev->gfx.compute_ring[i];
2066                 ring->ring_obj = NULL;
2067                 ring->use_doorbell = true;
2068                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069                 ring->me = 1; /* first MEC */
2070                 ring->pipe = i / 8;
2071                 ring->queue = i % 8;
2072                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2075                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2076                                      irq_type);
2077                 if (r)
2078                         return r;
2079         }
2080
2081         /* reserve GDS, GWS and OA resource for gfx */
2082         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2083                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2084                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2085         if (r)
2086                 return r;
2087
2088         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2089                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2090                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2091         if (r)
2092                 return r;
2093
2094         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2095                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2096                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2097         if (r)
2098                 return r;
2099
2100         adev->gfx.ce_ram_size = 0x8000;
2101
2102         r = gfx_v8_0_gpu_early_init(adev);
2103         if (r)
2104                 return r;
2105
2106         return 0;
2107 }
2108
2109 static int gfx_v8_0_sw_fini(void *handle)
2110 {
2111         int i;
2112         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2113
2114         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2115         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2116         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2117
2118         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2119                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2120         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2121                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2122
2123         gfx_v8_0_mec_fini(adev);
2124         gfx_v8_0_rlc_fini(adev);
2125         gfx_v8_0_free_microcode(adev);
2126
2127         return 0;
2128 }
2129
2130 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2131 {
2132         uint32_t *modearray, *mod2array;
2133         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2134         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2135         u32 reg_offset;
2136
2137         modearray = adev->gfx.config.tile_mode_array;
2138         mod2array = adev->gfx.config.macrotile_mode_array;
2139
2140         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2141                 modearray[reg_offset] = 0;
2142
2143         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2144                 mod2array[reg_offset] = 0;
2145
2146         switch (adev->asic_type) {
2147         case CHIP_TOPAZ:
2148                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2151                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                 PIPE_CONFIG(ADDR_SURF_P2) |
2154                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2155                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157                                 PIPE_CONFIG(ADDR_SURF_P2) |
2158                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161                                 PIPE_CONFIG(ADDR_SURF_P2) |
2162                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2163                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2) |
2166                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2167                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2169                                 PIPE_CONFIG(ADDR_SURF_P2) |
2170                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2) |
2174                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2177                                 PIPE_CONFIG(ADDR_SURF_P2));
2178                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179                                 PIPE_CONFIG(ADDR_SURF_P2) |
2180                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2181                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183                                  PIPE_CONFIG(ADDR_SURF_P2) |
2184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2190                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2206                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2223                                  PIPE_CONFIG(ADDR_SURF_P2) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2227                                  PIPE_CONFIG(ADDR_SURF_P2) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2231                                  PIPE_CONFIG(ADDR_SURF_P2) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2235                                  PIPE_CONFIG(ADDR_SURF_P2) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2239                                  PIPE_CONFIG(ADDR_SURF_P2) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                  PIPE_CONFIG(ADDR_SURF_P2) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247                                  PIPE_CONFIG(ADDR_SURF_P2) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250
2251                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2252                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2254                                 NUM_BANKS(ADDR_SURF_8_BANK));
2255                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                 NUM_BANKS(ADDR_SURF_8_BANK));
2259                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262                                 NUM_BANKS(ADDR_SURF_8_BANK));
2263                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2266                                 NUM_BANKS(ADDR_SURF_8_BANK));
2267                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2269                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270                                 NUM_BANKS(ADDR_SURF_8_BANK));
2271                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274                                 NUM_BANKS(ADDR_SURF_8_BANK));
2275                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278                                 NUM_BANKS(ADDR_SURF_8_BANK));
2279                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2280                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2281                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282                                 NUM_BANKS(ADDR_SURF_16_BANK));
2283                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2284                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2285                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286                                 NUM_BANKS(ADDR_SURF_16_BANK));
2287                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2288                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290                                  NUM_BANKS(ADDR_SURF_16_BANK));
2291                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                  NUM_BANKS(ADDR_SURF_16_BANK));
2295                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2297                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298                                  NUM_BANKS(ADDR_SURF_16_BANK));
2299                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2301                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302                                  NUM_BANKS(ADDR_SURF_16_BANK));
2303                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306                                  NUM_BANKS(ADDR_SURF_8_BANK));
2307
2308                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2309                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2310                             reg_offset != 23)
2311                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2312
2313                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2314                         if (reg_offset != 7)
2315                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2316
2317                 break;
2318         case CHIP_FIJI:
2319                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2349                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2352                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2353                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2365                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2369                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2389                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2398                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2406                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2410                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2414                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2418                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2422                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2437                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441
2442                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445                                 NUM_BANKS(ADDR_SURF_8_BANK));
2446                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449                                 NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                 NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457                                 NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461                                 NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                 NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469                                 NUM_BANKS(ADDR_SURF_8_BANK));
2470                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2472                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473                                 NUM_BANKS(ADDR_SURF_8_BANK));
2474                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2476                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477                                 NUM_BANKS(ADDR_SURF_8_BANK));
2478                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481                                  NUM_BANKS(ADDR_SURF_8_BANK));
2482                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485                                  NUM_BANKS(ADDR_SURF_8_BANK));
2486                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2488                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489                                  NUM_BANKS(ADDR_SURF_8_BANK));
2490                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493                                  NUM_BANKS(ADDR_SURF_8_BANK));
2494                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2497                                  NUM_BANKS(ADDR_SURF_4_BANK));
2498
2499                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2500                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2501
2502                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2503                         if (reg_offset != 7)
2504                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2505
2506                 break;
2507         case CHIP_TONGA:
2508                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2511                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2547                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2551                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2554                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2555                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2558                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2575                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2578                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2587                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2595                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2599                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2603                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2605                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2607                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2611                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2613                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2626                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2628                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2630
2631                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                 NUM_BANKS(ADDR_SURF_16_BANK));
2659                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2662                                 NUM_BANKS(ADDR_SURF_16_BANK));
2663                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2665                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2666                                 NUM_BANKS(ADDR_SURF_16_BANK));
2667                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2669                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2670                                  NUM_BANKS(ADDR_SURF_16_BANK));
2671                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2674                                  NUM_BANKS(ADDR_SURF_16_BANK));
2675                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2678                                  NUM_BANKS(ADDR_SURF_8_BANK));
2679                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682                                  NUM_BANKS(ADDR_SURF_4_BANK));
2683                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686                                  NUM_BANKS(ADDR_SURF_4_BANK));
2687
2688                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2689                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2690
2691                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2692                         if (reg_offset != 7)
2693                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2694
2695                 break;
2696         case CHIP_POLARIS11:
2697         case CHIP_POLARIS12:
2698                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2730                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2732                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2733                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2745                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2747                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2748                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2749                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2757                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2768                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2771                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2791                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2793                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2795                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2797                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2801                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2803                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2804                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2812                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2819                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2820
2821                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829                                 NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834                                 NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2844                                 NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849                                 NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854                                 NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869                                 NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2873                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2874                                 NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880
2881                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884                                 NUM_BANKS(ADDR_SURF_8_BANK));
2885
2886                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2889                                 NUM_BANKS(ADDR_SURF_4_BANK));
2890
2891                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2892                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2893
2894                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2895                         if (reg_offset != 7)
2896                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2897
2898                 break;
2899         case CHIP_POLARIS10:
2900                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2930                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2932                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2934                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2935                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2947                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2950                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2951                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2955                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2959                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2964                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2968                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2970                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2991                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2992                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2993                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2995                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2997                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2999                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3003                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3005                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3006                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3007                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3011                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3014                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3019                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3021                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3022
3023                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027
3028                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                 NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036                                 NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3040                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041                                 NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3045                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3046                                 NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3051                                 NUM_BANKS(ADDR_SURF_16_BANK));
3052
3053                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056                                 NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3060                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3061                                 NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066                                 NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3070                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071                                 NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076                                 NUM_BANKS(ADDR_SURF_16_BANK));
3077
3078                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3081                                 NUM_BANKS(ADDR_SURF_8_BANK));
3082
3083                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086                                 NUM_BANKS(ADDR_SURF_4_BANK));
3087
3088                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3091                                 NUM_BANKS(ADDR_SURF_4_BANK));
3092
3093                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3094                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3095
3096                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3097                         if (reg_offset != 7)
3098                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3099
3100                 break;
3101         case CHIP_STONEY:
3102                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103                                 PIPE_CONFIG(ADDR_SURF_P2) |
3104                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107                                 PIPE_CONFIG(ADDR_SURF_P2) |
3108                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111                                 PIPE_CONFIG(ADDR_SURF_P2) |
3112                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                 PIPE_CONFIG(ADDR_SURF_P2) |
3116                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119                                 PIPE_CONFIG(ADDR_SURF_P2) |
3120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3123                                 PIPE_CONFIG(ADDR_SURF_P2) |
3124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127                                 PIPE_CONFIG(ADDR_SURF_P2) |
3128                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3130                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3131                                 PIPE_CONFIG(ADDR_SURF_P2));
3132                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3133                                 PIPE_CONFIG(ADDR_SURF_P2) |
3134                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3137                                  PIPE_CONFIG(ADDR_SURF_P2) |
3138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3140                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3141                                  PIPE_CONFIG(ADDR_SURF_P2) |
3142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3144                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3145                                  PIPE_CONFIG(ADDR_SURF_P2) |
3146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3149                                  PIPE_CONFIG(ADDR_SURF_P2) |
3150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3153                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3156                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3157                                  PIPE_CONFIG(ADDR_SURF_P2) |
3158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3160                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161                                  PIPE_CONFIG(ADDR_SURF_P2) |
3162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3165                                  PIPE_CONFIG(ADDR_SURF_P2) |
3166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3169                                  PIPE_CONFIG(ADDR_SURF_P2) |
3170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3173                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3177                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3181                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3185                                  PIPE_CONFIG(ADDR_SURF_P2) |
3186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3189                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3192                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3204
3205                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3207                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208                                 NUM_BANKS(ADDR_SURF_8_BANK));
3209                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3211                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212                                 NUM_BANKS(ADDR_SURF_8_BANK));
3213                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216                                 NUM_BANKS(ADDR_SURF_8_BANK));
3217                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220                                 NUM_BANKS(ADDR_SURF_8_BANK));
3221                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224                                 NUM_BANKS(ADDR_SURF_8_BANK));
3225                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228                                 NUM_BANKS(ADDR_SURF_8_BANK));
3229                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3232                                 NUM_BANKS(ADDR_SURF_8_BANK));
3233                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236                                 NUM_BANKS(ADDR_SURF_16_BANK));
3237                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240                                 NUM_BANKS(ADDR_SURF_16_BANK));
3241                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3243                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244                                  NUM_BANKS(ADDR_SURF_16_BANK));
3245                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3251                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252                                  NUM_BANKS(ADDR_SURF_16_BANK));
3253                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3256                                  NUM_BANKS(ADDR_SURF_16_BANK));
3257                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3260                                  NUM_BANKS(ADDR_SURF_8_BANK));
3261
3262                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3263                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3264                             reg_offset != 23)
3265                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3266
3267                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3268                         if (reg_offset != 7)
3269                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3270
3271                 break;
3272         default:
3273                 dev_warn(adev->dev,
3274                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3275                          adev->asic_type);
3276
3277         case CHIP_CARRIZO:
3278                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279                                 PIPE_CONFIG(ADDR_SURF_P2) |
3280                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3281                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                 PIPE_CONFIG(ADDR_SURF_P2) |
3284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287                                 PIPE_CONFIG(ADDR_SURF_P2) |
3288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291                                 PIPE_CONFIG(ADDR_SURF_P2) |
3292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3295                                 PIPE_CONFIG(ADDR_SURF_P2) |
3296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3299                                 PIPE_CONFIG(ADDR_SURF_P2) |
3300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3303                                 PIPE_CONFIG(ADDR_SURF_P2) |
3304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3306                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3307                                 PIPE_CONFIG(ADDR_SURF_P2));
3308                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309                                 PIPE_CONFIG(ADDR_SURF_P2) |
3310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3311                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3320                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3324                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3332                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3336                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3353                                  PIPE_CONFIG(ADDR_SURF_P2) |
3354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3357                                  PIPE_CONFIG(ADDR_SURF_P2) |
3358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3361                                  PIPE_CONFIG(ADDR_SURF_P2) |
3362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3365                                  PIPE_CONFIG(ADDR_SURF_P2) |
3366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3368                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3369                                  PIPE_CONFIG(ADDR_SURF_P2) |
3370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3372                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3373                                  PIPE_CONFIG(ADDR_SURF_P2) |
3374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3376                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3377                                  PIPE_CONFIG(ADDR_SURF_P2) |
3378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3380
3381                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                 NUM_BANKS(ADDR_SURF_8_BANK));
3385                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                 NUM_BANKS(ADDR_SURF_8_BANK));
3389                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392                                 NUM_BANKS(ADDR_SURF_8_BANK));
3393                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396                                 NUM_BANKS(ADDR_SURF_8_BANK));
3397                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400                                 NUM_BANKS(ADDR_SURF_8_BANK));
3401                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404                                 NUM_BANKS(ADDR_SURF_8_BANK));
3405                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408                                 NUM_BANKS(ADDR_SURF_8_BANK));
3409                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412                                 NUM_BANKS(ADDR_SURF_16_BANK));
3413                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416                                 NUM_BANKS(ADDR_SURF_16_BANK));
3417                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3418                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3419                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420                                  NUM_BANKS(ADDR_SURF_16_BANK));
3421                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3422                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3423                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424                                  NUM_BANKS(ADDR_SURF_16_BANK));
3425                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3427                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428                                  NUM_BANKS(ADDR_SURF_16_BANK));
3429                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3432                                  NUM_BANKS(ADDR_SURF_16_BANK));
3433                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3434                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3435                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3436                                  NUM_BANKS(ADDR_SURF_8_BANK));
3437
3438                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3439                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3440                             reg_offset != 23)
3441                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3442
3443                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3444                         if (reg_offset != 7)
3445                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3446
3447                 break;
3448         }
3449 }
3450
3451 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3452                                   u32 se_num, u32 sh_num, u32 instance)
3453 {
3454         u32 data;
3455
3456         if (instance == 0xffffffff)
3457                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3458         else
3459                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3460
3461         if (se_num == 0xffffffff)
3462                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3463         else
3464                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3465
3466         if (sh_num == 0xffffffff)
3467                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3468         else
3469                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3470
3471         WREG32(mmGRBM_GFX_INDEX, data);
3472 }
3473
3474 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3475 {
3476         return (u32)((1ULL << bit_width) - 1);
3477 }
3478
3479 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3480 {
3481         u32 data, mask;
3482
3483         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3484                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3485
3486         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3487
3488         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3489                                        adev->gfx.config.max_sh_per_se);
3490
3491         return (~data) & mask;
3492 }
3493
3494 static void
3495 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3496 {
3497         switch (adev->asic_type) {
3498         case CHIP_FIJI:
3499                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500                           RB_XSEL2(1) | PKR_MAP(2) |
3501                           PKR_XSEL(1) | PKR_YSEL(1) |
3502                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3504                            SE_PAIR_YSEL(2);
3505                 break;
3506         case CHIP_TONGA:
3507         case CHIP_POLARIS10:
3508                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509                           SE_XSEL(1) | SE_YSEL(1);
3510                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3511                            SE_PAIR_YSEL(2);
3512                 break;
3513         case CHIP_TOPAZ:
3514         case CHIP_CARRIZO:
3515                 *rconf |= RB_MAP_PKR0(2);
3516                 *rconf1 |= 0x0;
3517                 break;
3518         case CHIP_POLARIS11:
3519         case CHIP_POLARIS12:
3520                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521                           SE_XSEL(1) | SE_YSEL(1);
3522                 *rconf1 |= 0x0;
3523                 break;
3524         case CHIP_STONEY:
3525                 *rconf |= 0x0;
3526                 *rconf1 |= 0x0;
3527                 break;
3528         default:
3529                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530                 break;
3531         }
3532 }
3533
3534 static void
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536                                         u32 raster_config, u32 raster_config_1,
3537                                         unsigned rb_mask, unsigned num_rb)
3538 {
3539         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542         unsigned rb_per_se = num_rb / num_se;
3543         unsigned se_mask[4];
3544         unsigned se;
3545
3546         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550
3551         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554
3555         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556                              (!se_mask[2] && !se_mask[3]))) {
3557                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558
3559                 if (!se_mask[0] && !se_mask[1]) {
3560                         raster_config_1 |=
3561                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562                 } else {
3563                         raster_config_1 |=
3564                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565                 }
3566         }
3567
3568         for (se = 0; se < num_se; se++) {
3569                 unsigned raster_config_se = raster_config;
3570                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572                 int idx = (se / 2) * 2;
3573
3574                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575                         raster_config_se &= ~SE_MAP_MASK;
3576
3577                         if (!se_mask[idx]) {
3578                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579                         } else {
3580                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581                         }
3582                 }
3583
3584                 pkr0_mask &= rb_mask;
3585                 pkr1_mask &= rb_mask;
3586                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587                         raster_config_se &= ~PKR_MAP_MASK;
3588
3589                         if (!pkr0_mask) {
3590                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591                         } else {
3592                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593                         }
3594                 }
3595
3596                 if (rb_per_se >= 2) {
3597                         unsigned rb0_mask = 1 << (se * rb_per_se);
3598                         unsigned rb1_mask = rb0_mask << 1;
3599
3600                         rb0_mask &= rb_mask;
3601                         rb1_mask &= rb_mask;
3602                         if (!rb0_mask || !rb1_mask) {
3603                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3604
3605                                 if (!rb0_mask) {
3606                                         raster_config_se |=
3607                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608                                 } else {
3609                                         raster_config_se |=
3610                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611                                 }
3612                         }
3613
3614                         if (rb_per_se > 2) {
3615                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616                                 rb1_mask = rb0_mask << 1;
3617                                 rb0_mask &= rb_mask;
3618                                 rb1_mask &= rb_mask;
3619                                 if (!rb0_mask || !rb1_mask) {
3620                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3621
3622                                         if (!rb0_mask) {
3623                                                 raster_config_se |=
3624                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625                                         } else {
3626                                                 raster_config_se |=
3627                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628                                         }
3629                                 }
3630                         }
3631                 }
3632
3633                 /* GRBM_GFX_INDEX has a different offset on VI */
3634                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637         }
3638
3639         /* GRBM_GFX_INDEX has a different offset on VI */
3640         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641 }
3642
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644 {
3645         int i, j;
3646         u32 data;
3647         u32 raster_config = 0, raster_config_1 = 0;
3648         u32 active_rbs = 0;
3649         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650                                         adev->gfx.config.max_sh_per_se;
3651         unsigned num_rb_pipes;
3652
3653         mutex_lock(&adev->grbm_idx_mutex);
3654         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3658                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659                                                rb_bitmap_width_per_sh);
3660                 }
3661         }
3662         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663
3664         adev->gfx.config.backend_enable_mask = active_rbs;
3665         adev->gfx.config.num_rbs = hweight32(active_rbs);
3666
3667         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668                              adev->gfx.config.max_shader_engines, 16);
3669
3670         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671
3672         if (!adev->gfx.config.backend_enable_mask ||
3673                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3674                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676         } else {
3677                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678                                                         adev->gfx.config.backend_enable_mask,
3679                                                         num_rb_pipes);
3680         }
3681
3682         /* cache the values for userspace */
3683         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3684                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3685                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3686                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3687                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3688                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3689                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3690                         adev->gfx.config.rb_config[i][j].raster_config =
3691                                 RREG32(mmPA_SC_RASTER_CONFIG);
3692                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3693                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3694                 }
3695         }
3696         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3697         mutex_unlock(&adev->grbm_idx_mutex);
3698 }
3699
3700 /**
3701  * gfx_v8_0_init_compute_vmid - gart enable
3702  *
3703  * @rdev: amdgpu_device pointer
3704  *
3705  * Initialize compute vmid sh_mem registers
3706  *
3707  */
3708 #define DEFAULT_SH_MEM_BASES    (0x6000)
3709 #define FIRST_COMPUTE_VMID      (8)
3710 #define LAST_COMPUTE_VMID       (16)
3711 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3712 {
3713         int i;
3714         uint32_t sh_mem_config;
3715         uint32_t sh_mem_bases;
3716
3717         /*
3718          * Configure apertures:
3719          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3720          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3721          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3722          */
3723         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3724
3725         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3726                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3727                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3728                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3729                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3730                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3731
3732         mutex_lock(&adev->srbm_mutex);
3733         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3734                 vi_srbm_select(adev, 0, 0, 0, i);
3735                 /* CP and shaders */
3736                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3737                 WREG32(mmSH_MEM_APE1_BASE, 1);
3738                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3739                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3740         }
3741         vi_srbm_select(adev, 0, 0, 0, 0);
3742         mutex_unlock(&adev->srbm_mutex);
3743 }
3744
3745 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3746 {
3747         u32 tmp;
3748         int i;
3749
3750         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3751         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3752         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3753         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3754
3755         gfx_v8_0_tiling_mode_table_init(adev);
3756         gfx_v8_0_setup_rb(adev);
3757         gfx_v8_0_get_cu_info(adev);
3758
3759         /* XXX SH_MEM regs */
3760         /* where to put LDS, scratch, GPUVM in FSA64 space */
3761         mutex_lock(&adev->srbm_mutex);
3762         for (i = 0; i < 16; i++) {
3763                 vi_srbm_select(adev, 0, 0, 0, i);
3764                 /* CP and shaders */
3765                 if (i == 0) {
3766                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3767                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3768                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3769                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3770                         WREG32(mmSH_MEM_CONFIG, tmp);
3771                 } else {
3772                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3773                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3774                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3775                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3776                         WREG32(mmSH_MEM_CONFIG, tmp);
3777                 }
3778
3779                 WREG32(mmSH_MEM_APE1_BASE, 1);
3780                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3781                 WREG32(mmSH_MEM_BASES, 0);
3782         }
3783         vi_srbm_select(adev, 0, 0, 0, 0);
3784         mutex_unlock(&adev->srbm_mutex);
3785
3786         gfx_v8_0_init_compute_vmid(adev);
3787
3788         mutex_lock(&adev->grbm_idx_mutex);
3789         /*
3790          * making sure that the following register writes will be broadcasted
3791          * to all the shaders
3792          */
3793         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794
3795         WREG32(mmPA_SC_FIFO_SIZE,
3796                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3797                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3798                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3799                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3800                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3801                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3802                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3803                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3804         mutex_unlock(&adev->grbm_idx_mutex);
3805
3806 }
3807
3808 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3809 {
3810         u32 i, j, k;
3811         u32 mask;
3812
3813         mutex_lock(&adev->grbm_idx_mutex);
3814         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3815                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3816                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3817                         for (k = 0; k < adev->usec_timeout; k++) {
3818                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3819                                         break;
3820                                 udelay(1);
3821                         }
3822                 }
3823         }
3824         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3825         mutex_unlock(&adev->grbm_idx_mutex);
3826
3827         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3828                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3829                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3830                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3831         for (k = 0; k < adev->usec_timeout; k++) {
3832                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3833                         break;
3834                 udelay(1);
3835         }
3836 }
3837
3838 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3839                                                bool enable)
3840 {
3841         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3842
3843         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3844         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3845         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3846         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3847
3848         WREG32(mmCP_INT_CNTL_RING0, tmp);
3849 }
3850
3851 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3852 {
3853         /* csib */
3854         WREG32(mmRLC_CSIB_ADDR_HI,
3855                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3856         WREG32(mmRLC_CSIB_ADDR_LO,
3857                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3858         WREG32(mmRLC_CSIB_LENGTH,
3859                         adev->gfx.rlc.clear_state_size);
3860 }
3861
3862 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3863                                 int ind_offset,
3864                                 int list_size,
3865                                 int *unique_indices,
3866                                 int *indices_count,
3867                                 int max_indices,
3868                                 int *ind_start_offsets,
3869                                 int *offset_count,
3870                                 int max_offset)
3871 {
3872         int indices;
3873         bool new_entry = true;
3874
3875         for (; ind_offset < list_size; ind_offset++) {
3876
3877                 if (new_entry) {
3878                         new_entry = false;
3879                         ind_start_offsets[*offset_count] = ind_offset;
3880                         *offset_count = *offset_count + 1;
3881                         BUG_ON(*offset_count >= max_offset);
3882                 }
3883
3884                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3885                         new_entry = true;
3886                         continue;
3887                 }
3888
3889                 ind_offset += 2;
3890
3891                 /* look for the matching indice */
3892                 for (indices = 0;
3893                         indices < *indices_count;
3894                         indices++) {
3895                         if (unique_indices[indices] ==
3896                                 register_list_format[ind_offset])
3897                                 break;
3898                 }
3899
3900                 if (indices >= *indices_count) {
3901                         unique_indices[*indices_count] =
3902                                 register_list_format[ind_offset];
3903                         indices = *indices_count;
3904                         *indices_count = *indices_count + 1;
3905                         BUG_ON(*indices_count >= max_indices);
3906                 }
3907
3908                 register_list_format[ind_offset] = indices;
3909         }
3910 }
3911
3912 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3913 {
3914         int i, temp, data;
3915         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3916         int indices_count = 0;
3917         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3918         int offset_count = 0;
3919
3920         int list_size;
3921         unsigned int *register_list_format =
3922                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3923         if (!register_list_format)
3924                 return -ENOMEM;
3925         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3926                         adev->gfx.rlc.reg_list_format_size_bytes);
3927
3928         gfx_v8_0_parse_ind_reg_list(register_list_format,
3929                                 RLC_FormatDirectRegListLength,
3930                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3931                                 unique_indices,
3932                                 &indices_count,
3933                                 sizeof(unique_indices) / sizeof(int),
3934                                 indirect_start_offsets,
3935                                 &offset_count,
3936                                 sizeof(indirect_start_offsets)/sizeof(int));
3937
3938         /* save and restore list */
3939         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3940
3941         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3942         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3943                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3944
3945         /* indirect list */
3946         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3947         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3948                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3949
3950         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3951         list_size = list_size >> 1;
3952         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3953         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3954
3955         /* starting offsets starts */
3956         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3957                 adev->gfx.rlc.starting_offsets_start);
3958         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3959                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3960                                 indirect_start_offsets[i]);
3961
3962         /* unique indices */
3963         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3964         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3965         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3966                 if (unique_indices[i] != 0) {
3967                         amdgpu_mm_wreg(adev, temp + i,
3968                                         unique_indices[i] & 0x3FFFF, false);
3969                         amdgpu_mm_wreg(adev, data + i,
3970                                         unique_indices[i] >> 20, false);
3971                 }
3972         }
3973         kfree(register_list_format);
3974
3975         return 0;
3976 }
3977
3978 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3979 {
3980         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3981 }
3982
3983 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3984 {
3985         uint32_t data;
3986
3987         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3988
3989         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3990         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3991         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3992         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3993         WREG32(mmRLC_PG_DELAY, data);
3994
3995         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3996         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3997
3998 }
3999
4000 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4001                                                 bool enable)
4002 {
4003         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4004 }
4005
4006 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4007                                                   bool enable)
4008 {
4009         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4010 }
4011
4012 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4013 {
4014         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4015 }
4016
4017 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4018 {
4019         if ((adev->asic_type == CHIP_CARRIZO) ||
4020             (adev->asic_type == CHIP_STONEY)) {
4021                 gfx_v8_0_init_csb(adev);
4022                 gfx_v8_0_init_save_restore_list(adev);
4023                 gfx_v8_0_enable_save_restore_machine(adev);
4024                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4025                 gfx_v8_0_init_power_gating(adev);
4026                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4027                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4028                         cz_enable_sck_slow_down_on_power_up(adev, true);
4029                         cz_enable_sck_slow_down_on_power_down(adev, true);
4030                 } else {
4031                         cz_enable_sck_slow_down_on_power_up(adev, false);
4032                         cz_enable_sck_slow_down_on_power_down(adev, false);
4033                 }
4034                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4035                         cz_enable_cp_power_gating(adev, true);
4036                 else
4037                         cz_enable_cp_power_gating(adev, false);
4038         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4039                    (adev->asic_type == CHIP_POLARIS12)) {
4040                 gfx_v8_0_init_csb(adev);
4041                 gfx_v8_0_init_save_restore_list(adev);
4042                 gfx_v8_0_enable_save_restore_machine(adev);
4043                 gfx_v8_0_init_power_gating(adev);
4044         }
4045
4046 }
4047
4048 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4049 {
4050         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4051
4052         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4053         gfx_v8_0_wait_for_rlc_serdes(adev);
4054 }
4055
4056 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4057 {
4058         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4059         udelay(50);
4060
4061         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4062         udelay(50);
4063 }
4064
4065 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4066 {
4067         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4068
4069         /* carrizo do enable cp interrupt after cp inited */
4070         if (!(adev->flags & AMD_IS_APU))
4071                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4072
4073         udelay(50);
4074 }
4075
4076 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4077 {
4078         const struct rlc_firmware_header_v2_0 *hdr;
4079         const __le32 *fw_data;
4080         unsigned i, fw_size;
4081
4082         if (!adev->gfx.rlc_fw)
4083                 return -EINVAL;
4084
4085         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4086         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4087
4088         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4089                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4090         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4091
4092         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4093         for (i = 0; i < fw_size; i++)
4094                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4095         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4096
4097         return 0;
4098 }
4099
4100 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4101 {
4102         int r;
4103         u32 tmp;
4104
4105         gfx_v8_0_rlc_stop(adev);
4106
4107         /* disable CG */
4108         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4109         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4110                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4111         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4112         if (adev->asic_type == CHIP_POLARIS11 ||
4113             adev->asic_type == CHIP_POLARIS10 ||
4114             adev->asic_type == CHIP_POLARIS12) {
4115                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4116                 tmp &= ~0x3;
4117                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4118         }
4119
4120         /* disable PG */
4121         WREG32(mmRLC_PG_CNTL, 0);
4122
4123         gfx_v8_0_rlc_reset(adev);
4124         gfx_v8_0_init_pg(adev);
4125
4126         if (!adev->pp_enabled) {
4127                 if (!adev->firmware.smu_load) {
4128                         /* legacy rlc firmware loading */
4129                         r = gfx_v8_0_rlc_load_microcode(adev);
4130                         if (r)
4131                                 return r;
4132                 } else {
4133                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4134                                                         AMDGPU_UCODE_ID_RLC_G);
4135                         if (r)
4136                                 return -EINVAL;
4137                 }
4138         }
4139
4140         gfx_v8_0_rlc_start(adev);
4141
4142         return 0;
4143 }
4144
4145 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4146 {
4147         int i;
4148         u32 tmp = RREG32(mmCP_ME_CNTL);
4149
4150         if (enable) {
4151                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4152                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4153                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4154         } else {
4155                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4156                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4157                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4158                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4159                         adev->gfx.gfx_ring[i].ready = false;
4160         }
4161         WREG32(mmCP_ME_CNTL, tmp);
4162         udelay(50);
4163 }
4164
4165 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4166 {
4167         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4168         const struct gfx_firmware_header_v1_0 *ce_hdr;
4169         const struct gfx_firmware_header_v1_0 *me_hdr;
4170         const __le32 *fw_data;
4171         unsigned i, fw_size;
4172
4173         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4174                 return -EINVAL;
4175
4176         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4177                 adev->gfx.pfp_fw->data;
4178         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4179                 adev->gfx.ce_fw->data;
4180         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4181                 adev->gfx.me_fw->data;
4182
4183         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4184         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4185         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4186
4187         gfx_v8_0_cp_gfx_enable(adev, false);
4188
4189         /* PFP */
4190         fw_data = (const __le32 *)
4191                 (adev->gfx.pfp_fw->data +
4192                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4193         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4194         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4195         for (i = 0; i < fw_size; i++)
4196                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4197         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4198
4199         /* CE */
4200         fw_data = (const __le32 *)
4201                 (adev->gfx.ce_fw->data +
4202                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4203         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4204         WREG32(mmCP_CE_UCODE_ADDR, 0);
4205         for (i = 0; i < fw_size; i++)
4206                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4207         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4208
4209         /* ME */
4210         fw_data = (const __le32 *)
4211                 (adev->gfx.me_fw->data +
4212                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4213         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4214         WREG32(mmCP_ME_RAM_WADDR, 0);
4215         for (i = 0; i < fw_size; i++)
4216                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4217         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4218
4219         return 0;
4220 }
4221
4222 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4223 {
4224         u32 count = 0;
4225         const struct cs_section_def *sect = NULL;
4226         const struct cs_extent_def *ext = NULL;
4227
4228         /* begin clear state */
4229         count += 2;
4230         /* context control state */
4231         count += 3;
4232
4233         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4234                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4235                         if (sect->id == SECT_CONTEXT)
4236                                 count += 2 + ext->reg_count;
4237                         else
4238                                 return 0;
4239                 }
4240         }
4241         /* pa_sc_raster_config/pa_sc_raster_config1 */
4242         count += 4;
4243         /* end clear state */
4244         count += 2;
4245         /* clear state */
4246         count += 2;
4247
4248         return count;
4249 }
4250
4251 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4252 {
4253         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4254         const struct cs_section_def *sect = NULL;
4255         const struct cs_extent_def *ext = NULL;
4256         int r, i;
4257
4258         /* init the CP */
4259         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4260         WREG32(mmCP_ENDIAN_SWAP, 0);
4261         WREG32(mmCP_DEVICE_ID, 1);
4262
4263         gfx_v8_0_cp_gfx_enable(adev, true);
4264
4265         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4266         if (r) {
4267                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4268                 return r;
4269         }
4270
4271         /* clear state buffer */
4272         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4273         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4274
4275         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4276         amdgpu_ring_write(ring, 0x80000000);
4277         amdgpu_ring_write(ring, 0x80000000);
4278
4279         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4280                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4281                         if (sect->id == SECT_CONTEXT) {
4282                                 amdgpu_ring_write(ring,
4283                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4284                                                ext->reg_count));
4285                                 amdgpu_ring_write(ring,
4286                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4287                                 for (i = 0; i < ext->reg_count; i++)
4288                                         amdgpu_ring_write(ring, ext->extent[i]);
4289                         }
4290                 }
4291         }
4292
4293         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4294         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4295         switch (adev->asic_type) {
4296         case CHIP_TONGA:
4297         case CHIP_POLARIS10:
4298                 amdgpu_ring_write(ring, 0x16000012);
4299                 amdgpu_ring_write(ring, 0x0000002A);
4300                 break;
4301         case CHIP_POLARIS11:
4302         case CHIP_POLARIS12:
4303                 amdgpu_ring_write(ring, 0x16000012);
4304                 amdgpu_ring_write(ring, 0x00000000);
4305                 break;
4306         case CHIP_FIJI:
4307                 amdgpu_ring_write(ring, 0x3a00161a);
4308                 amdgpu_ring_write(ring, 0x0000002e);
4309                 break;
4310         case CHIP_CARRIZO:
4311                 amdgpu_ring_write(ring, 0x00000002);
4312                 amdgpu_ring_write(ring, 0x00000000);
4313                 break;
4314         case CHIP_TOPAZ:
4315                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4316                                 0x00000000 : 0x00000002);
4317                 amdgpu_ring_write(ring, 0x00000000);
4318                 break;
4319         case CHIP_STONEY:
4320                 amdgpu_ring_write(ring, 0x00000000);
4321                 amdgpu_ring_write(ring, 0x00000000);
4322                 break;
4323         default:
4324                 BUG();
4325         }
4326
4327         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4328         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4329
4330         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4331         amdgpu_ring_write(ring, 0);
4332
4333         /* init the CE partitions */
4334         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4335         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4336         amdgpu_ring_write(ring, 0x8000);
4337         amdgpu_ring_write(ring, 0x8000);
4338
4339         amdgpu_ring_commit(ring);
4340
4341         return 0;
4342 }
4343
4344 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4345 {
4346         struct amdgpu_ring *ring;
4347         u32 tmp;
4348         u32 rb_bufsz;
4349         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4350         int r;
4351
4352         /* Set the write pointer delay */
4353         WREG32(mmCP_RB_WPTR_DELAY, 0);
4354
4355         /* set the RB to use vmid 0 */
4356         WREG32(mmCP_RB_VMID, 0);
4357
4358         /* Set ring buffer size */
4359         ring = &adev->gfx.gfx_ring[0];
4360         rb_bufsz = order_base_2(ring->ring_size / 8);
4361         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4362         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4363         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4364         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4365 #ifdef __BIG_ENDIAN
4366         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4367 #endif
4368         WREG32(mmCP_RB0_CNTL, tmp);
4369
4370         /* Initialize the ring buffer's read and write pointers */
4371         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4372         ring->wptr = 0;
4373         WREG32(mmCP_RB0_WPTR, ring->wptr);
4374
4375         /* set the wb address wether it's enabled or not */
4376         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4377         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4378         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4379
4380         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4381         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4382         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4383         mdelay(1);
4384         WREG32(mmCP_RB0_CNTL, tmp);
4385
4386         rb_addr = ring->gpu_addr >> 8;
4387         WREG32(mmCP_RB0_BASE, rb_addr);
4388         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4389
4390         /* no gfx doorbells on iceland */
4391         if (adev->asic_type != CHIP_TOPAZ) {
4392                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4393                 if (ring->use_doorbell) {
4394                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4395                                             DOORBELL_OFFSET, ring->doorbell_index);
4396                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4397                                             DOORBELL_HIT, 0);
4398                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4399                                             DOORBELL_EN, 1);
4400                 } else {
4401                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4402                                             DOORBELL_EN, 0);
4403                 }
4404                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4405
4406                 if (adev->asic_type == CHIP_TONGA) {
4407                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4408                                             DOORBELL_RANGE_LOWER,
4409                                             AMDGPU_DOORBELL_GFX_RING0);
4410                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4411
4412                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4413                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4414                 }
4415
4416         }
4417
4418         /* start the ring */
4419         gfx_v8_0_cp_gfx_start(adev);
4420         ring->ready = true;
4421         r = amdgpu_ring_test_ring(ring);
4422         if (r)
4423                 ring->ready = false;
4424
4425         return r;
4426 }
4427
4428 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4429 {
4430         int i;
4431
4432         if (enable) {
4433                 WREG32(mmCP_MEC_CNTL, 0);
4434         } else {
4435                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4436                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4437                         adev->gfx.compute_ring[i].ready = false;
4438         }
4439         udelay(50);
4440 }
4441
4442 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4443 {
4444         const struct gfx_firmware_header_v1_0 *mec_hdr;
4445         const __le32 *fw_data;
4446         unsigned i, fw_size;
4447
4448         if (!adev->gfx.mec_fw)
4449                 return -EINVAL;
4450
4451         gfx_v8_0_cp_compute_enable(adev, false);
4452
4453         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4454         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4455
4456         fw_data = (const __le32 *)
4457                 (adev->gfx.mec_fw->data +
4458                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4459         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4460
4461         /* MEC1 */
4462         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4463         for (i = 0; i < fw_size; i++)
4464                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4465         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4466
4467         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4468         if (adev->gfx.mec2_fw) {
4469                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4470
4471                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4472                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4473
4474                 fw_data = (const __le32 *)
4475                         (adev->gfx.mec2_fw->data +
4476                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4477                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4478
4479                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4480                 for (i = 0; i < fw_size; i++)
4481                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4482                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4483         }
4484
4485         return 0;
4486 }
4487
4488 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4489 {
4490         int i, r;
4491
4492         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4493                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4494
4495                 if (ring->mqd_obj) {
4496                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4497                         if (unlikely(r != 0))
4498                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4499
4500                         amdgpu_bo_unpin(ring->mqd_obj);
4501                         amdgpu_bo_unreserve(ring->mqd_obj);
4502
4503                         amdgpu_bo_unref(&ring->mqd_obj);
4504                         ring->mqd_obj = NULL;
4505                 }
4506         }
4507 }
4508
4509 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4510 {
4511         int r, i, j;
4512         u32 tmp;
4513         bool use_doorbell = true;
4514         u64 hqd_gpu_addr;
4515         u64 mqd_gpu_addr;
4516         u64 eop_gpu_addr;
4517         u64 wb_gpu_addr;
4518         u32 *buf;
4519         struct vi_mqd *mqd;
4520
4521         /* init the queues.  */
4522         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4523                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4524
4525                 if (ring->mqd_obj == NULL) {
4526                         r = amdgpu_bo_create(adev,
4527                                              sizeof(struct vi_mqd),
4528                                              PAGE_SIZE, true,
4529                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4530                                              NULL, &ring->mqd_obj);
4531                         if (r) {
4532                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4533                                 return r;
4534                         }
4535                 }
4536
4537                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4538                 if (unlikely(r != 0)) {
4539                         gfx_v8_0_cp_compute_fini(adev);
4540                         return r;
4541                 }
4542                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4543                                   &mqd_gpu_addr);
4544                 if (r) {
4545                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4546                         gfx_v8_0_cp_compute_fini(adev);
4547                         return r;
4548                 }
4549                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4550                 if (r) {
4551                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4552                         gfx_v8_0_cp_compute_fini(adev);
4553                         return r;
4554                 }
4555
4556                 /* init the mqd struct */
4557                 memset(buf, 0, sizeof(struct vi_mqd));
4558
4559                 mqd = (struct vi_mqd *)buf;
4560                 mqd->header = 0xC0310800;
4561                 mqd->compute_pipelinestat_enable = 0x00000001;
4562                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4563                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4564                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4565                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4566                 mqd->compute_misc_reserved = 0x00000003;
4567
4568                 mutex_lock(&adev->srbm_mutex);
4569                 vi_srbm_select(adev, ring->me,
4570                                ring->pipe,
4571                                ring->queue, 0);
4572
4573                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4574                 eop_gpu_addr >>= 8;
4575
4576                 /* write the EOP addr */
4577                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4578                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4579
4580                 /* set the VMID assigned */
4581                 WREG32(mmCP_HQD_VMID, 0);
4582
4583                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4584                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4585                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4586                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4587                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4588
4589                 /* disable wptr polling */
4590                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4591                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4592                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4593
4594                 mqd->cp_hqd_eop_base_addr_lo =
4595                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4596                 mqd->cp_hqd_eop_base_addr_hi =
4597                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4598
4599                 /* enable doorbell? */
4600                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4601                 if (use_doorbell) {
4602                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4603                 } else {
4604                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4605                 }
4606                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4607                 mqd->cp_hqd_pq_doorbell_control = tmp;
4608
4609                 /* disable the queue if it's active */
4610                 mqd->cp_hqd_dequeue_request = 0;
4611                 mqd->cp_hqd_pq_rptr = 0;
4612                 mqd->cp_hqd_pq_wptr= 0;
4613                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4614                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4615                         for (j = 0; j < adev->usec_timeout; j++) {
4616                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4617                                         break;
4618                                 udelay(1);
4619                         }
4620                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4621                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4622                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4623                 }
4624
4625                 /* set the pointer to the MQD */
4626                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4627                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4628                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4629                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4630
4631                 /* set MQD vmid to 0 */
4632                 tmp = RREG32(mmCP_MQD_CONTROL);
4633                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4634                 WREG32(mmCP_MQD_CONTROL, tmp);
4635                 mqd->cp_mqd_control = tmp;
4636
4637                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4638                 hqd_gpu_addr = ring->gpu_addr >> 8;
4639                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4640                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4641                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4642                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4643
4644                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4645                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4646                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4647                                     (order_base_2(ring->ring_size / 4) - 1));
4648                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4649                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4650 #ifdef __BIG_ENDIAN
4651                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4652 #endif
4653                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4654                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4655                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4656                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4657                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4658                 mqd->cp_hqd_pq_control = tmp;
4659
4660                 /* set the wb address wether it's enabled or not */
4661                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4662                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4663                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4664                         upper_32_bits(wb_gpu_addr) & 0xffff;
4665                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4666                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4667                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4668                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4669
4670                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4671                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4672                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4673                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4674                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4675                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4676                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4677
4678                 /* enable the doorbell if requested */
4679                 if (use_doorbell) {
4680                         if ((adev->asic_type == CHIP_CARRIZO) ||
4681                             (adev->asic_type == CHIP_FIJI) ||
4682                             (adev->asic_type == CHIP_STONEY) ||
4683                             (adev->asic_type == CHIP_POLARIS11) ||
4684                             (adev->asic_type == CHIP_POLARIS10) ||
4685                             (adev->asic_type == CHIP_POLARIS12)) {
4686                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4687                                        AMDGPU_DOORBELL_KIQ << 2);
4688                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4689                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4690                         }
4691                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4692                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4693                                             DOORBELL_OFFSET, ring->doorbell_index);
4694                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4695                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4696                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4697                         mqd->cp_hqd_pq_doorbell_control = tmp;
4698
4699                 } else {
4700                         mqd->cp_hqd_pq_doorbell_control = 0;
4701                 }
4702                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4703                        mqd->cp_hqd_pq_doorbell_control);
4704
4705                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4706                 ring->wptr = 0;
4707                 mqd->cp_hqd_pq_wptr = ring->wptr;
4708                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4709                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4710
4711                 /* set the vmid for the queue */
4712                 mqd->cp_hqd_vmid = 0;
4713                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4714
4715                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4716                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4717                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4718                 mqd->cp_hqd_persistent_state = tmp;
4719                 if (adev->asic_type == CHIP_STONEY ||
4720                         adev->asic_type == CHIP_POLARIS11 ||
4721                         adev->asic_type == CHIP_POLARIS10 ||
4722                         adev->asic_type == CHIP_POLARIS12) {
4723                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4724                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4725                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4726                 }
4727
4728                 /* activate the queue */
4729                 mqd->cp_hqd_active = 1;
4730                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4731
4732                 vi_srbm_select(adev, 0, 0, 0, 0);
4733                 mutex_unlock(&adev->srbm_mutex);
4734
4735                 amdgpu_bo_kunmap(ring->mqd_obj);
4736                 amdgpu_bo_unreserve(ring->mqd_obj);
4737         }
4738
4739         if (use_doorbell) {
4740                 tmp = RREG32(mmCP_PQ_STATUS);
4741                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4742                 WREG32(mmCP_PQ_STATUS, tmp);
4743         }
4744
4745         gfx_v8_0_cp_compute_enable(adev, true);
4746
4747         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4748                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4749
4750                 ring->ready = true;
4751                 r = amdgpu_ring_test_ring(ring);
4752                 if (r)
4753                         ring->ready = false;
4754         }
4755
4756         return 0;
4757 }
4758
4759 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4760 {
4761         int r;
4762
4763         if (!(adev->flags & AMD_IS_APU))
4764                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4765
4766         if (!adev->pp_enabled) {
4767                 if (!adev->firmware.smu_load) {
4768                         /* legacy firmware loading */
4769                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4770                         if (r)
4771                                 return r;
4772
4773                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4774                         if (r)
4775                                 return r;
4776                 } else {
4777                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4778                                                         AMDGPU_UCODE_ID_CP_CE);
4779                         if (r)
4780                                 return -EINVAL;
4781
4782                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4783                                                         AMDGPU_UCODE_ID_CP_PFP);
4784                         if (r)
4785                                 return -EINVAL;
4786
4787                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4788                                                         AMDGPU_UCODE_ID_CP_ME);
4789                         if (r)
4790                                 return -EINVAL;
4791
4792                         if (adev->asic_type == CHIP_TOPAZ) {
4793                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4794                                 if (r)
4795                                         return r;
4796                         } else {
4797                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4798                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4799                                 if (r)
4800                                         return -EINVAL;
4801                         }
4802                 }
4803         }
4804
4805         r = gfx_v8_0_cp_gfx_resume(adev);
4806         if (r)
4807                 return r;
4808
4809         r = gfx_v8_0_cp_compute_resume(adev);
4810         if (r)
4811                 return r;
4812
4813         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4814
4815         return 0;
4816 }
4817
4818 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4819 {
4820         gfx_v8_0_cp_gfx_enable(adev, enable);
4821         gfx_v8_0_cp_compute_enable(adev, enable);
4822 }
4823
4824 static int gfx_v8_0_hw_init(void *handle)
4825 {
4826         int r;
4827         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4828
4829         gfx_v8_0_init_golden_registers(adev);
4830         gfx_v8_0_gpu_init(adev);
4831
4832         r = gfx_v8_0_rlc_resume(adev);
4833         if (r)
4834                 return r;
4835
4836         r = gfx_v8_0_cp_resume(adev);
4837
4838         return r;
4839 }
4840
4841 static int gfx_v8_0_hw_fini(void *handle)
4842 {
4843         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4844
4845         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4846         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4847         if (amdgpu_sriov_vf(adev)) {
4848                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4849                 return 0;
4850         }
4851         gfx_v8_0_cp_enable(adev, false);
4852         gfx_v8_0_rlc_stop(adev);
4853         gfx_v8_0_cp_compute_fini(adev);
4854
4855         amdgpu_set_powergating_state(adev,
4856                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4857
4858         return 0;
4859 }
4860
4861 static int gfx_v8_0_suspend(void *handle)
4862 {
4863         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4864
4865         return gfx_v8_0_hw_fini(adev);
4866 }
4867
4868 static int gfx_v8_0_resume(void *handle)
4869 {
4870         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4871
4872         return gfx_v8_0_hw_init(adev);
4873 }
4874
4875 static bool gfx_v8_0_is_idle(void *handle)
4876 {
4877         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4878
4879         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4880                 return false;
4881         else
4882                 return true;
4883 }
4884
4885 static int gfx_v8_0_wait_for_idle(void *handle)
4886 {
4887         unsigned i;
4888         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4889
4890         for (i = 0; i < adev->usec_timeout; i++) {
4891                 if (gfx_v8_0_is_idle(handle))
4892                         return 0;
4893
4894                 udelay(1);
4895         }
4896         return -ETIMEDOUT;
4897 }
4898
4899 static bool gfx_v8_0_check_soft_reset(void *handle)
4900 {
4901         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4902         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4903         u32 tmp;
4904
4905         /* GRBM_STATUS */
4906         tmp = RREG32(mmGRBM_STATUS);
4907         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4908                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4909                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4910                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4911                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4912                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4913                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4914                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4915                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4916                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4917                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4918                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4919                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4920         }
4921
4922         /* GRBM_STATUS2 */
4923         tmp = RREG32(mmGRBM_STATUS2);
4924         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4925                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4926                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4927
4928         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4929             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4930             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4931                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4932                                                 SOFT_RESET_CPF, 1);
4933                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4934                                                 SOFT_RESET_CPC, 1);
4935                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4936                                                 SOFT_RESET_CPG, 1);
4937                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4938                                                 SOFT_RESET_GRBM, 1);
4939         }
4940
4941         /* SRBM_STATUS */
4942         tmp = RREG32(mmSRBM_STATUS);
4943         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4944                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4945                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4946         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4947                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4948                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4949
4950         if (grbm_soft_reset || srbm_soft_reset) {
4951                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4952                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4953                 return true;
4954         } else {
4955                 adev->gfx.grbm_soft_reset = 0;
4956                 adev->gfx.srbm_soft_reset = 0;
4957                 return false;
4958         }
4959 }
4960
4961 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
4962                                   struct amdgpu_ring *ring)
4963 {
4964         int i;
4965
4966         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4967         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4968                 u32 tmp;
4969                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
4970                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
4971                                     DEQUEUE_REQ, 2);
4972                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
4973                 for (i = 0; i < adev->usec_timeout; i++) {
4974                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4975                                 break;
4976                         udelay(1);
4977                 }
4978         }
4979 }
4980
4981 static int gfx_v8_0_pre_soft_reset(void *handle)
4982 {
4983         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4984         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4985
4986         if ((!adev->gfx.grbm_soft_reset) &&
4987             (!adev->gfx.srbm_soft_reset))
4988                 return 0;
4989
4990         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4991         srbm_soft_reset = adev->gfx.srbm_soft_reset;
4992
4993         /* stop the rlc */
4994         gfx_v8_0_rlc_stop(adev);
4995
4996         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4997             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4998                 /* Disable GFX parsing/prefetching */
4999                 gfx_v8_0_cp_gfx_enable(adev, false);
5000
5001         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5002             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5003             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5004             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5005                 int i;
5006
5007                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5008                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5009
5010                         gfx_v8_0_inactive_hqd(adev, ring);
5011                 }
5012                 /* Disable MEC parsing/prefetching */
5013                 gfx_v8_0_cp_compute_enable(adev, false);
5014         }
5015
5016        return 0;
5017 }
5018
5019 static int gfx_v8_0_soft_reset(void *handle)
5020 {
5021         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5022         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5023         u32 tmp;
5024
5025         if ((!adev->gfx.grbm_soft_reset) &&
5026             (!adev->gfx.srbm_soft_reset))
5027                 return 0;
5028
5029         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5030         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5031
5032         if (grbm_soft_reset || srbm_soft_reset) {
5033                 tmp = RREG32(mmGMCON_DEBUG);
5034                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5035                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5036                 WREG32(mmGMCON_DEBUG, tmp);
5037                 udelay(50);
5038         }
5039
5040         if (grbm_soft_reset) {
5041                 tmp = RREG32(mmGRBM_SOFT_RESET);
5042                 tmp |= grbm_soft_reset;
5043                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5044                 WREG32(mmGRBM_SOFT_RESET, tmp);
5045                 tmp = RREG32(mmGRBM_SOFT_RESET);
5046
5047                 udelay(50);
5048
5049                 tmp &= ~grbm_soft_reset;
5050                 WREG32(mmGRBM_SOFT_RESET, tmp);
5051                 tmp = RREG32(mmGRBM_SOFT_RESET);
5052         }
5053
5054         if (srbm_soft_reset) {
5055                 tmp = RREG32(mmSRBM_SOFT_RESET);
5056                 tmp |= srbm_soft_reset;
5057                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5058                 WREG32(mmSRBM_SOFT_RESET, tmp);
5059                 tmp = RREG32(mmSRBM_SOFT_RESET);
5060
5061                 udelay(50);
5062
5063                 tmp &= ~srbm_soft_reset;
5064                 WREG32(mmSRBM_SOFT_RESET, tmp);
5065                 tmp = RREG32(mmSRBM_SOFT_RESET);
5066         }
5067
5068         if (grbm_soft_reset || srbm_soft_reset) {
5069                 tmp = RREG32(mmGMCON_DEBUG);
5070                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5071                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5072                 WREG32(mmGMCON_DEBUG, tmp);
5073         }
5074
5075         /* Wait a little for things to settle down */
5076         udelay(50);
5077
5078         return 0;
5079 }
5080
5081 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5082                               struct amdgpu_ring *ring)
5083 {
5084         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5085         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5086         WREG32(mmCP_HQD_PQ_RPTR, 0);
5087         WREG32(mmCP_HQD_PQ_WPTR, 0);
5088         vi_srbm_select(adev, 0, 0, 0, 0);
5089 }
5090
5091 static int gfx_v8_0_post_soft_reset(void *handle)
5092 {
5093         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5094         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5095
5096         if ((!adev->gfx.grbm_soft_reset) &&
5097             (!adev->gfx.srbm_soft_reset))
5098                 return 0;
5099
5100         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5101         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5102
5103         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5104             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5105                 gfx_v8_0_cp_gfx_resume(adev);
5106
5107         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5108             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5109             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5110             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5111                 int i;
5112
5113                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5114                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5115
5116                         gfx_v8_0_init_hqd(adev, ring);
5117                 }
5118                 gfx_v8_0_cp_compute_resume(adev);
5119         }
5120         gfx_v8_0_rlc_start(adev);
5121
5122         return 0;
5123 }
5124
5125 /**
5126  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5127  *
5128  * @adev: amdgpu_device pointer
5129  *
5130  * Fetches a GPU clock counter snapshot.
5131  * Returns the 64 bit clock counter snapshot.
5132  */
5133 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5134 {
5135         uint64_t clock;
5136
5137         mutex_lock(&adev->gfx.gpu_clock_mutex);
5138         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5139         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5140                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5141         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5142         return clock;
5143 }
5144
5145 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5146                                           uint32_t vmid,
5147                                           uint32_t gds_base, uint32_t gds_size,
5148                                           uint32_t gws_base, uint32_t gws_size,
5149                                           uint32_t oa_base, uint32_t oa_size)
5150 {
5151         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5152         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5153
5154         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5155         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5156
5157         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5158         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5159
5160         /* GDS Base */
5161         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5162         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5163                                 WRITE_DATA_DST_SEL(0)));
5164         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5165         amdgpu_ring_write(ring, 0);
5166         amdgpu_ring_write(ring, gds_base);
5167
5168         /* GDS Size */
5169         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5170         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5171                                 WRITE_DATA_DST_SEL(0)));
5172         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5173         amdgpu_ring_write(ring, 0);
5174         amdgpu_ring_write(ring, gds_size);
5175
5176         /* GWS */
5177         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5178         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5179                                 WRITE_DATA_DST_SEL(0)));
5180         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5181         amdgpu_ring_write(ring, 0);
5182         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5183
5184         /* OA */
5185         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5186         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5187                                 WRITE_DATA_DST_SEL(0)));
5188         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5189         amdgpu_ring_write(ring, 0);
5190         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5191 }
5192
5193 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5194 {
5195         WREG32(mmSQ_IND_INDEX,
5196                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5197                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5198                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5199                 (SQ_IND_INDEX__FORCE_READ_MASK));
5200         return RREG32(mmSQ_IND_DATA);
5201 }
5202
5203 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5204                            uint32_t wave, uint32_t thread,
5205                            uint32_t regno, uint32_t num, uint32_t *out)
5206 {
5207         WREG32(mmSQ_IND_INDEX,
5208                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5209                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5210                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5211                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5212                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5213                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5214         while (num--)
5215                 *(out++) = RREG32(mmSQ_IND_DATA);
5216 }
5217
5218 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5219 {
5220         /* type 0 wave data */
5221         dst[(*no_fields)++] = 0;
5222         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5223         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5224         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5225         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5226         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5227         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5228         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5229         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5230         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5231         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5232         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5233         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5234         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5235         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5236         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5237         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5238         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5239         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5240 }
5241
5242 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5243                                      uint32_t wave, uint32_t start,
5244                                      uint32_t size, uint32_t *dst)
5245 {
5246         wave_read_regs(
5247                 adev, simd, wave, 0,
5248                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5249 }
5250
5251
5252 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5253         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5254         .select_se_sh = &gfx_v8_0_select_se_sh,
5255         .read_wave_data = &gfx_v8_0_read_wave_data,
5256         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5257 };
5258
5259 static int gfx_v8_0_early_init(void *handle)
5260 {
5261         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5262
5263         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5264         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5265         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5266         gfx_v8_0_set_ring_funcs(adev);
5267         gfx_v8_0_set_irq_funcs(adev);
5268         gfx_v8_0_set_gds_init(adev);
5269         gfx_v8_0_set_rlc_funcs(adev);
5270
5271         return 0;
5272 }
5273
5274 static int gfx_v8_0_late_init(void *handle)
5275 {
5276         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5277         int r;
5278
5279         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5280         if (r)
5281                 return r;
5282
5283         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5284         if (r)
5285                 return r;
5286
5287         /* requires IBs so do in late init after IB pool is initialized */
5288         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5289         if (r)
5290                 return r;
5291
5292         amdgpu_set_powergating_state(adev,
5293                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5294
5295         return 0;
5296 }
5297
5298 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5299                                                        bool enable)
5300 {
5301         if ((adev->asic_type == CHIP_POLARIS11) ||
5302             (adev->asic_type == CHIP_POLARIS12))
5303                 /* Send msg to SMU via Powerplay */
5304                 amdgpu_set_powergating_state(adev,
5305                                              AMD_IP_BLOCK_TYPE_SMC,
5306                                              enable ?
5307                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5308
5309         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5310 }
5311
5312 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5313                                                         bool enable)
5314 {
5315         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5316 }
5317
5318 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5319                 bool enable)
5320 {
5321         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5322 }
5323
5324 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5325                                           bool enable)
5326 {
5327         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5328 }
5329
5330 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5331                                                 bool enable)
5332 {
5333         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5334
5335         /* Read any GFX register to wake up GFX. */
5336         if (!enable)
5337                 RREG32(mmDB_RENDER_CONTROL);
5338 }
5339
5340 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5341                                           bool enable)
5342 {
5343         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5344                 cz_enable_gfx_cg_power_gating(adev, true);
5345                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5346                         cz_enable_gfx_pipeline_power_gating(adev, true);
5347         } else {
5348                 cz_enable_gfx_cg_power_gating(adev, false);
5349                 cz_enable_gfx_pipeline_power_gating(adev, false);
5350         }
5351 }
5352
5353 static int gfx_v8_0_set_powergating_state(void *handle,
5354                                           enum amd_powergating_state state)
5355 {
5356         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5357         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5358
5359         switch (adev->asic_type) {
5360         case CHIP_CARRIZO:
5361         case CHIP_STONEY:
5362
5363                 cz_update_gfx_cg_power_gating(adev, enable);
5364
5365                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5366                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5367                 else
5368                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5369
5370                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5371                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5372                 else
5373                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5374                 break;
5375         case CHIP_POLARIS11:
5376         case CHIP_POLARIS12:
5377                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5378                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5379                 else
5380                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5381
5382                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5383                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5384                 else
5385                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5386
5387                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5388                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5389                 else
5390                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5391                 break;
5392         default:
5393                 break;
5394         }
5395
5396         return 0;
5397 }
5398
5399 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5400                                      uint32_t reg_addr, uint32_t cmd)
5401 {
5402         uint32_t data;
5403
5404         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5405
5406         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5407         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5408
5409         data = RREG32(mmRLC_SERDES_WR_CTRL);
5410         if (adev->asic_type == CHIP_STONEY)
5411                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5412                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5413                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5414                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5415                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5416                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5417                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5418                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5419                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5420         else
5421                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5422                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5423                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5424                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5425                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5426                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5427                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5428                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5429                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5430                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5431                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5432         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5433                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5434                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5435                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5436
5437         WREG32(mmRLC_SERDES_WR_CTRL, data);
5438 }
5439
5440 #define MSG_ENTER_RLC_SAFE_MODE     1
5441 #define MSG_EXIT_RLC_SAFE_MODE      0
5442 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5443 #define RLC_GPR_REG2__REQ__SHIFT 0
5444 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5445 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5446
5447 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5448 {
5449         u32 data = 0;
5450         unsigned i;
5451
5452         data = RREG32(mmRLC_CNTL);
5453         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5454                 return;
5455
5456         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5457             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5458                                AMD_PG_SUPPORT_GFX_DMG))) {
5459                 data |= RLC_GPR_REG2__REQ_MASK;
5460                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5461                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5462                 WREG32(mmRLC_GPR_REG2, data);
5463
5464                 for (i = 0; i < adev->usec_timeout; i++) {
5465                         if ((RREG32(mmRLC_GPM_STAT) &
5466                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5467                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5468                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5469                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5470                                 break;
5471                         udelay(1);
5472                 }
5473
5474                 for (i = 0; i < adev->usec_timeout; i++) {
5475                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5476                                 break;
5477                         udelay(1);
5478                 }
5479                 adev->gfx.rlc.in_safe_mode = true;
5480         }
5481 }
5482
5483 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5484 {
5485         u32 data;
5486         unsigned i;
5487
5488         data = RREG32(mmRLC_CNTL);
5489         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5490                 return;
5491
5492         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5493             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5494                                AMD_PG_SUPPORT_GFX_DMG))) {
5495                 data |= RLC_GPR_REG2__REQ_MASK;
5496                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5497                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5498                 WREG32(mmRLC_GPR_REG2, data);
5499                 adev->gfx.rlc.in_safe_mode = false;
5500         }
5501
5502         for (i = 0; i < adev->usec_timeout; i++) {
5503                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5504                         break;
5505                 udelay(1);
5506         }
5507 }
5508
5509 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5510 {
5511         u32 data;
5512         unsigned i;
5513
5514         data = RREG32(mmRLC_CNTL);
5515         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5516                 return;
5517
5518         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5519                 data |= RLC_SAFE_MODE__CMD_MASK;
5520                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5521                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5522                 WREG32(mmRLC_SAFE_MODE, data);
5523
5524                 for (i = 0; i < adev->usec_timeout; i++) {
5525                         if ((RREG32(mmRLC_GPM_STAT) &
5526                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5527                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5528                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5529                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5530                                 break;
5531                         udelay(1);
5532                 }
5533
5534                 for (i = 0; i < adev->usec_timeout; i++) {
5535                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5536                                 break;
5537                         udelay(1);
5538                 }
5539                 adev->gfx.rlc.in_safe_mode = true;
5540         }
5541 }
5542
5543 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5544 {
5545         u32 data = 0;
5546         unsigned i;
5547
5548         data = RREG32(mmRLC_CNTL);
5549         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5550                 return;
5551
5552         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5553                 if (adev->gfx.rlc.in_safe_mode) {
5554                         data |= RLC_SAFE_MODE__CMD_MASK;
5555                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5556                         WREG32(mmRLC_SAFE_MODE, data);
5557                         adev->gfx.rlc.in_safe_mode = false;
5558                 }
5559         }
5560
5561         for (i = 0; i < adev->usec_timeout; i++) {
5562                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5563                         break;
5564                 udelay(1);
5565         }
5566 }
5567
5568 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5569 {
5570         adev->gfx.rlc.in_safe_mode = true;
5571 }
5572
5573 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5574 {
5575         adev->gfx.rlc.in_safe_mode = false;
5576 }
5577
5578 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5579         .enter_safe_mode = cz_enter_rlc_safe_mode,
5580         .exit_safe_mode = cz_exit_rlc_safe_mode
5581 };
5582
5583 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5584         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5585         .exit_safe_mode = iceland_exit_rlc_safe_mode
5586 };
5587
5588 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5589         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5590         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5591 };
5592
5593 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5594                                                       bool enable)
5595 {
5596         uint32_t temp, data;
5597
5598         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5599
5600         /* It is disabled by HW by default */
5601         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5602                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5603                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5604                                 /* 1 - RLC memory Light sleep */
5605                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5606
5607                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5608                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5609                 }
5610
5611                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5612                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5613                 if (adev->flags & AMD_IS_APU)
5614                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5615                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5616                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5617                 else
5618                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5619                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5620                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5621                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5622
5623                 if (temp != data)
5624                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5625
5626                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5627                 gfx_v8_0_wait_for_rlc_serdes(adev);
5628
5629                 /* 5 - clear mgcg override */
5630                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5631
5632                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5633                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5634                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5635                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5636                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5637                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5638                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5639                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5640                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5641                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5642                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5643                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5644                         if (temp != data)
5645                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5646                 }
5647                 udelay(50);
5648
5649                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5650                 gfx_v8_0_wait_for_rlc_serdes(adev);
5651         } else {
5652                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5653                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5654                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5655                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5656                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5657                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5658                 if (temp != data)
5659                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5660
5661                 /* 2 - disable MGLS in RLC */
5662                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5663                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5664                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5665                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5666                 }
5667
5668                 /* 3 - disable MGLS in CP */
5669                 data = RREG32(mmCP_MEM_SLP_CNTL);
5670                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5671                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5672                         WREG32(mmCP_MEM_SLP_CNTL, data);
5673                 }
5674
5675                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5676                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5677                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5678                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5679                 if (temp != data)
5680                         WREG32(mmCGTS_SM_CTRL_REG, data);
5681
5682                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5683                 gfx_v8_0_wait_for_rlc_serdes(adev);
5684
5685                 /* 6 - set mgcg override */
5686                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5687
5688                 udelay(50);
5689
5690                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5691                 gfx_v8_0_wait_for_rlc_serdes(adev);
5692         }
5693
5694         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5695 }
5696
5697 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5698                                                       bool enable)
5699 {
5700         uint32_t temp, temp1, data, data1;
5701
5702         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5703
5704         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5705
5706         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5707                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5708                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5709                 if (temp1 != data1)
5710                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5711
5712                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713                 gfx_v8_0_wait_for_rlc_serdes(adev);
5714
5715                 /* 2 - clear cgcg override */
5716                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5717
5718                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5719                 gfx_v8_0_wait_for_rlc_serdes(adev);
5720
5721                 /* 3 - write cmd to set CGLS */
5722                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5723
5724                 /* 4 - enable cgcg */
5725                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5726
5727                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5728                         /* enable cgls*/
5729                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5730
5731                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5732                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5733
5734                         if (temp1 != data1)
5735                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5736                 } else {
5737                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5738                 }
5739
5740                 if (temp != data)
5741                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5742
5743                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5744                  * Cmp_busy/GFX_Idle interrupts
5745                  */
5746                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5747         } else {
5748                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5749                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5750
5751                 /* TEST CGCG */
5752                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5753                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5754                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5755                 if (temp1 != data1)
5756                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5757
5758                 /* read gfx register to wake up cgcg */
5759                 RREG32(mmCB_CGTT_SCLK_CTRL);
5760                 RREG32(mmCB_CGTT_SCLK_CTRL);
5761                 RREG32(mmCB_CGTT_SCLK_CTRL);
5762                 RREG32(mmCB_CGTT_SCLK_CTRL);
5763
5764                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5765                 gfx_v8_0_wait_for_rlc_serdes(adev);
5766
5767                 /* write cmd to Set CGCG Overrride */
5768                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5769
5770                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5771                 gfx_v8_0_wait_for_rlc_serdes(adev);
5772
5773                 /* write cmd to Clear CGLS */
5774                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5775
5776                 /* disable cgcg, cgls should be disabled too. */
5777                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5778                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5779                 if (temp != data)
5780                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5781         }
5782
5783         gfx_v8_0_wait_for_rlc_serdes(adev);
5784
5785         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5786 }
5787 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5788                                             bool enable)
5789 {
5790         if (enable) {
5791                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5792                  * ===  MGCG + MGLS + TS(CG/LS) ===
5793                  */
5794                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5795                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5796         } else {
5797                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5798                  * ===  CGCG + CGLS ===
5799                  */
5800                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5801                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5802         }
5803         return 0;
5804 }
5805
5806 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5807                                           enum amd_clockgating_state state)
5808 {
5809         uint32_t msg_id, pp_state = 0;
5810         uint32_t pp_support_state = 0;
5811         void *pp_handle = adev->powerplay.pp_handle;
5812
5813         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5814                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5815                         pp_support_state = PP_STATE_SUPPORT_LS;
5816                         pp_state = PP_STATE_LS;
5817                 }
5818                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5819                         pp_support_state |= PP_STATE_SUPPORT_CG;
5820                         pp_state |= PP_STATE_CG;
5821                 }
5822                 if (state == AMD_CG_STATE_UNGATE)
5823                         pp_state = 0;
5824
5825                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5826                                 PP_BLOCK_GFX_CG,
5827                                 pp_support_state,
5828                                 pp_state);
5829                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5830         }
5831
5832         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5833                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5834                         pp_support_state = PP_STATE_SUPPORT_LS;
5835                         pp_state = PP_STATE_LS;
5836                 }
5837
5838                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5839                         pp_support_state |= PP_STATE_SUPPORT_CG;
5840                         pp_state |= PP_STATE_CG;
5841                 }
5842
5843                 if (state == AMD_CG_STATE_UNGATE)
5844                         pp_state = 0;
5845
5846                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5847                                 PP_BLOCK_GFX_MG,
5848                                 pp_support_state,
5849                                 pp_state);
5850                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5851         }
5852
5853         return 0;
5854 }
5855
5856 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5857                                           enum amd_clockgating_state state)
5858 {
5859
5860         uint32_t msg_id, pp_state = 0;
5861         uint32_t pp_support_state = 0;
5862         void *pp_handle = adev->powerplay.pp_handle;
5863
5864         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5865                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5866                         pp_support_state = PP_STATE_SUPPORT_LS;
5867                         pp_state = PP_STATE_LS;
5868                 }
5869                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5870                         pp_support_state |= PP_STATE_SUPPORT_CG;
5871                         pp_state |= PP_STATE_CG;
5872                 }
5873                 if (state == AMD_CG_STATE_UNGATE)
5874                         pp_state = 0;
5875
5876                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5877                                 PP_BLOCK_GFX_CG,
5878                                 pp_support_state,
5879                                 pp_state);
5880                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5881         }
5882
5883         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5884                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5885                         pp_support_state = PP_STATE_SUPPORT_LS;
5886                         pp_state = PP_STATE_LS;
5887                 }
5888                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5889                         pp_support_state |= PP_STATE_SUPPORT_CG;
5890                         pp_state |= PP_STATE_CG;
5891                 }
5892                 if (state == AMD_CG_STATE_UNGATE)
5893                         pp_state = 0;
5894
5895                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5896                                 PP_BLOCK_GFX_3D,
5897                                 pp_support_state,
5898                                 pp_state);
5899                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5900         }
5901
5902         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5903                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5904                         pp_support_state = PP_STATE_SUPPORT_LS;
5905                         pp_state = PP_STATE_LS;
5906                 }
5907
5908                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5909                         pp_support_state |= PP_STATE_SUPPORT_CG;
5910                         pp_state |= PP_STATE_CG;
5911                 }
5912
5913                 if (state == AMD_CG_STATE_UNGATE)
5914                         pp_state = 0;
5915
5916                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5917                                 PP_BLOCK_GFX_MG,
5918                                 pp_support_state,
5919                                 pp_state);
5920                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5921         }
5922
5923         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5924                 pp_support_state = PP_STATE_SUPPORT_LS;
5925
5926                 if (state == AMD_CG_STATE_UNGATE)
5927                         pp_state = 0;
5928                 else
5929                         pp_state = PP_STATE_LS;
5930
5931                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5932                                 PP_BLOCK_GFX_RLC,
5933                                 pp_support_state,
5934                                 pp_state);
5935                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5936         }
5937
5938         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5939                 pp_support_state = PP_STATE_SUPPORT_LS;
5940
5941                 if (state == AMD_CG_STATE_UNGATE)
5942                         pp_state = 0;
5943                 else
5944                         pp_state = PP_STATE_LS;
5945                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5946                         PP_BLOCK_GFX_CP,
5947                         pp_support_state,
5948                         pp_state);
5949                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5950         }
5951
5952         return 0;
5953 }
5954
5955 static int gfx_v8_0_set_clockgating_state(void *handle,
5956                                           enum amd_clockgating_state state)
5957 {
5958         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5959
5960         switch (adev->asic_type) {
5961         case CHIP_FIJI:
5962         case CHIP_CARRIZO:
5963         case CHIP_STONEY:
5964                 gfx_v8_0_update_gfx_clock_gating(adev,
5965                                                  state == AMD_CG_STATE_GATE ? true : false);
5966                 break;
5967         case CHIP_TONGA:
5968                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5969                 break;
5970         case CHIP_POLARIS10:
5971         case CHIP_POLARIS11:
5972                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5973                 break;
5974         default:
5975                 break;
5976         }
5977         return 0;
5978 }
5979
5980 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5981 {
5982         return ring->adev->wb.wb[ring->rptr_offs];
5983 }
5984
5985 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5986 {
5987         struct amdgpu_device *adev = ring->adev;
5988
5989         if (ring->use_doorbell)
5990                 /* XXX check if swapping is necessary on BE */
5991                 return ring->adev->wb.wb[ring->wptr_offs];
5992         else
5993                 return RREG32(mmCP_RB0_WPTR);
5994 }
5995
5996 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5997 {
5998         struct amdgpu_device *adev = ring->adev;
5999
6000         if (ring->use_doorbell) {
6001                 /* XXX check if swapping is necessary on BE */
6002                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6003                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6004         } else {
6005                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6006                 (void)RREG32(mmCP_RB0_WPTR);
6007         }
6008 }
6009
6010 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6011 {
6012         u32 ref_and_mask, reg_mem_engine;
6013
6014         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
6015                 switch (ring->me) {
6016                 case 1:
6017                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6018                         break;
6019                 case 2:
6020                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6021                         break;
6022                 default:
6023                         return;
6024                 }
6025                 reg_mem_engine = 0;
6026         } else {
6027                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6028                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6029         }
6030
6031         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6032         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6033                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6034                                  reg_mem_engine));
6035         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6036         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6037         amdgpu_ring_write(ring, ref_and_mask);
6038         amdgpu_ring_write(ring, ref_and_mask);
6039         amdgpu_ring_write(ring, 0x20); /* poll interval */
6040 }
6041
6042 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6043 {
6044         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6045         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6046                 EVENT_INDEX(4));
6047
6048         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6049         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6050                 EVENT_INDEX(0));
6051 }
6052
6053
6054 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6055 {
6056         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6057         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6058                                  WRITE_DATA_DST_SEL(0) |
6059                                  WR_CONFIRM));
6060         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6061         amdgpu_ring_write(ring, 0);
6062         amdgpu_ring_write(ring, 1);
6063
6064 }
6065
6066 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6067                                       struct amdgpu_ib *ib,
6068                                       unsigned vm_id, bool ctx_switch)
6069 {
6070         u32 header, control = 0;
6071
6072         if (ib->flags & AMDGPU_IB_FLAG_CE)
6073                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6074         else
6075                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6076
6077         control |= ib->length_dw | (vm_id << 24);
6078
6079         amdgpu_ring_write(ring, header);
6080         amdgpu_ring_write(ring,
6081 #ifdef __BIG_ENDIAN
6082                           (2 << 0) |
6083 #endif
6084                           (ib->gpu_addr & 0xFFFFFFFC));
6085         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6086         amdgpu_ring_write(ring, control);
6087 }
6088
6089 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6090                                           struct amdgpu_ib *ib,
6091                                           unsigned vm_id, bool ctx_switch)
6092 {
6093         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6094
6095         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6096         amdgpu_ring_write(ring,
6097 #ifdef __BIG_ENDIAN
6098                                 (2 << 0) |
6099 #endif
6100                                 (ib->gpu_addr & 0xFFFFFFFC));
6101         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6102         amdgpu_ring_write(ring, control);
6103 }
6104
6105 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6106                                          u64 seq, unsigned flags)
6107 {
6108         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6109         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6110
6111         /* EVENT_WRITE_EOP - flush caches, send int */
6112         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6113         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6114                                  EOP_TC_ACTION_EN |
6115                                  EOP_TC_WB_ACTION_EN |
6116                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6117                                  EVENT_INDEX(5)));
6118         amdgpu_ring_write(ring, addr & 0xfffffffc);
6119         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6120                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6121         amdgpu_ring_write(ring, lower_32_bits(seq));
6122         amdgpu_ring_write(ring, upper_32_bits(seq));
6123
6124 }
6125
6126 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6127 {
6128         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6129         uint32_t seq = ring->fence_drv.sync_seq;
6130         uint64_t addr = ring->fence_drv.gpu_addr;
6131
6132         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6133         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6134                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6135                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6136         amdgpu_ring_write(ring, addr & 0xfffffffc);
6137         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6138         amdgpu_ring_write(ring, seq);
6139         amdgpu_ring_write(ring, 0xffffffff);
6140         amdgpu_ring_write(ring, 4); /* poll interval */
6141 }
6142
6143 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6144                                         unsigned vm_id, uint64_t pd_addr)
6145 {
6146         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6147
6148         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6149         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6150                                  WRITE_DATA_DST_SEL(0)) |
6151                                  WR_CONFIRM);
6152         if (vm_id < 8) {
6153                 amdgpu_ring_write(ring,
6154                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6155         } else {
6156                 amdgpu_ring_write(ring,
6157                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6158         }
6159         amdgpu_ring_write(ring, 0);
6160         amdgpu_ring_write(ring, pd_addr >> 12);
6161
6162         /* bits 0-15 are the VM contexts0-15 */
6163         /* invalidate the cache */
6164         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6165         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6166                                  WRITE_DATA_DST_SEL(0)));
6167         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6168         amdgpu_ring_write(ring, 0);
6169         amdgpu_ring_write(ring, 1 << vm_id);
6170
6171         /* wait for the invalidate to complete */
6172         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6173         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6174                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6175                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6176         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6177         amdgpu_ring_write(ring, 0);
6178         amdgpu_ring_write(ring, 0); /* ref */
6179         amdgpu_ring_write(ring, 0); /* mask */
6180         amdgpu_ring_write(ring, 0x20); /* poll interval */
6181
6182         /* compute doesn't have PFP */
6183         if (usepfp) {
6184                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6185                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6186                 amdgpu_ring_write(ring, 0x0);
6187                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6188                 amdgpu_ring_insert_nop(ring, 128);
6189         }
6190 }
6191
6192 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6193 {
6194         return ring->adev->wb.wb[ring->wptr_offs];
6195 }
6196
6197 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6198 {
6199         struct amdgpu_device *adev = ring->adev;
6200
6201         /* XXX check if swapping is necessary on BE */
6202         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6203         WDOORBELL32(ring->doorbell_index, ring->wptr);
6204 }
6205
6206 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6207                                              u64 addr, u64 seq,
6208                                              unsigned flags)
6209 {
6210         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6211         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6212
6213         /* RELEASE_MEM - flush caches, send int */
6214         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6215         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6216                                  EOP_TC_ACTION_EN |
6217                                  EOP_TC_WB_ACTION_EN |
6218                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6219                                  EVENT_INDEX(5)));
6220         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6221         amdgpu_ring_write(ring, addr & 0xfffffffc);
6222         amdgpu_ring_write(ring, upper_32_bits(addr));
6223         amdgpu_ring_write(ring, lower_32_bits(seq));
6224         amdgpu_ring_write(ring, upper_32_bits(seq));
6225 }
6226
6227 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6228 {
6229         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6230         amdgpu_ring_write(ring, 0);
6231 }
6232
6233 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6234 {
6235         uint32_t dw2 = 0;
6236
6237         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6238         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6239                 gfx_v8_0_ring_emit_vgt_flush(ring);
6240                 /* set load_global_config & load_global_uconfig */
6241                 dw2 |= 0x8001;
6242                 /* set load_cs_sh_regs */
6243                 dw2 |= 0x01000000;
6244                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6245                 dw2 |= 0x10002;
6246
6247                 /* set load_ce_ram if preamble presented */
6248                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6249                         dw2 |= 0x10000000;
6250         } else {
6251                 /* still load_ce_ram if this is the first time preamble presented
6252                  * although there is no context switch happens.
6253                  */
6254                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6255                         dw2 |= 0x10000000;
6256         }
6257
6258         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6259         amdgpu_ring_write(ring, dw2);
6260         amdgpu_ring_write(ring, 0);
6261 }
6262
6263 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6264                                                  enum amdgpu_interrupt_state state)
6265 {
6266         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6267                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6268 }
6269
6270 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6271                                                      int me, int pipe,
6272                                                      enum amdgpu_interrupt_state state)
6273 {
6274         /*
6275          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6276          * handles the setting of interrupts for this specific pipe. All other
6277          * pipes' interrupts are set by amdkfd.
6278          */
6279
6280         if (me == 1) {
6281                 switch (pipe) {
6282                 case 0:
6283                         break;
6284                 default:
6285                         DRM_DEBUG("invalid pipe %d\n", pipe);
6286                         return;
6287                 }
6288         } else {
6289                 DRM_DEBUG("invalid me %d\n", me);
6290                 return;
6291         }
6292
6293         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6294                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6295 }
6296
6297 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6298                                              struct amdgpu_irq_src *source,
6299                                              unsigned type,
6300                                              enum amdgpu_interrupt_state state)
6301 {
6302         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6303                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6304
6305         return 0;
6306 }
6307
6308 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6309                                               struct amdgpu_irq_src *source,
6310                                               unsigned type,
6311                                               enum amdgpu_interrupt_state state)
6312 {
6313         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6314                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6315
6316         return 0;
6317 }
6318
6319 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6320                                             struct amdgpu_irq_src *src,
6321                                             unsigned type,
6322                                             enum amdgpu_interrupt_state state)
6323 {
6324         switch (type) {
6325         case AMDGPU_CP_IRQ_GFX_EOP:
6326                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6327                 break;
6328         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6329                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6330                 break;
6331         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6332                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6333                 break;
6334         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6335                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6336                 break;
6337         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6338                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6339                 break;
6340         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6341                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6342                 break;
6343         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6344                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6345                 break;
6346         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6347                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6348                 break;
6349         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6350                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6351                 break;
6352         default:
6353                 break;
6354         }
6355         return 0;
6356 }
6357
6358 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6359                             struct amdgpu_irq_src *source,
6360                             struct amdgpu_iv_entry *entry)
6361 {
6362         int i;
6363         u8 me_id, pipe_id, queue_id;
6364         struct amdgpu_ring *ring;
6365
6366         DRM_DEBUG("IH: CP EOP\n");
6367         me_id = (entry->ring_id & 0x0c) >> 2;
6368         pipe_id = (entry->ring_id & 0x03) >> 0;
6369         queue_id = (entry->ring_id & 0x70) >> 4;
6370
6371         switch (me_id) {
6372         case 0:
6373                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6374                 break;
6375         case 1:
6376         case 2:
6377                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6378                         ring = &adev->gfx.compute_ring[i];
6379                         /* Per-queue interrupt is supported for MEC starting from VI.
6380                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6381                           */
6382                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6383                                 amdgpu_fence_process(ring);
6384                 }
6385                 break;
6386         }
6387         return 0;
6388 }
6389
6390 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6391                                  struct amdgpu_irq_src *source,
6392                                  struct amdgpu_iv_entry *entry)
6393 {
6394         DRM_ERROR("Illegal register access in command stream\n");
6395         schedule_work(&adev->reset_work);
6396         return 0;
6397 }
6398
6399 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6400                                   struct amdgpu_irq_src *source,
6401                                   struct amdgpu_iv_entry *entry)
6402 {
6403         DRM_ERROR("Illegal instruction in command stream\n");
6404         schedule_work(&adev->reset_work);
6405         return 0;
6406 }
6407
6408 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6409         .name = "gfx_v8_0",
6410         .early_init = gfx_v8_0_early_init,
6411         .late_init = gfx_v8_0_late_init,
6412         .sw_init = gfx_v8_0_sw_init,
6413         .sw_fini = gfx_v8_0_sw_fini,
6414         .hw_init = gfx_v8_0_hw_init,
6415         .hw_fini = gfx_v8_0_hw_fini,
6416         .suspend = gfx_v8_0_suspend,
6417         .resume = gfx_v8_0_resume,
6418         .is_idle = gfx_v8_0_is_idle,
6419         .wait_for_idle = gfx_v8_0_wait_for_idle,
6420         .check_soft_reset = gfx_v8_0_check_soft_reset,
6421         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6422         .soft_reset = gfx_v8_0_soft_reset,
6423         .post_soft_reset = gfx_v8_0_post_soft_reset,
6424         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6425         .set_powergating_state = gfx_v8_0_set_powergating_state,
6426 };
6427
6428 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6429         .type = AMDGPU_RING_TYPE_GFX,
6430         .align_mask = 0xff,
6431         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6432         .get_rptr = gfx_v8_0_ring_get_rptr,
6433         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6434         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6435         .emit_frame_size =
6436                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6437                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6438                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6439                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6440                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6441                 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6442                 2 + /* gfx_v8_ring_emit_sb */
6443                 3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
6444         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6445         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6446         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6447         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6448         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6449         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6450         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6451         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6452         .test_ring = gfx_v8_0_ring_test_ring,
6453         .test_ib = gfx_v8_0_ring_test_ib,
6454         .insert_nop = amdgpu_ring_insert_nop,
6455         .pad_ib = amdgpu_ring_generic_pad_ib,
6456         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6457         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6458 };
6459
6460 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6461         .type = AMDGPU_RING_TYPE_COMPUTE,
6462         .align_mask = 0xff,
6463         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6464         .get_rptr = gfx_v8_0_ring_get_rptr,
6465         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6466         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6467         .emit_frame_size =
6468                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6469                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6470                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6471                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6472                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6473                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6474         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6475         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6476         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6477         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6478         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6479         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6480         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6481         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6482         .test_ring = gfx_v8_0_ring_test_ring,
6483         .test_ib = gfx_v8_0_ring_test_ib,
6484         .insert_nop = amdgpu_ring_insert_nop,
6485         .pad_ib = amdgpu_ring_generic_pad_ib,
6486 };
6487
6488 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6489 {
6490         int i;
6491
6492         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6493                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6494
6495         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6496                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6497 }
6498
6499 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6500         .set = gfx_v8_0_set_eop_interrupt_state,
6501         .process = gfx_v8_0_eop_irq,
6502 };
6503
6504 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6505         .set = gfx_v8_0_set_priv_reg_fault_state,
6506         .process = gfx_v8_0_priv_reg_irq,
6507 };
6508
6509 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6510         .set = gfx_v8_0_set_priv_inst_fault_state,
6511         .process = gfx_v8_0_priv_inst_irq,
6512 };
6513
6514 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6515 {
6516         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6517         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6518
6519         adev->gfx.priv_reg_irq.num_types = 1;
6520         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6521
6522         adev->gfx.priv_inst_irq.num_types = 1;
6523         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6524 }
6525
6526 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6527 {
6528         switch (adev->asic_type) {
6529         case CHIP_TOPAZ:
6530                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6531                 break;
6532         case CHIP_STONEY:
6533         case CHIP_CARRIZO:
6534                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6535                 break;
6536         default:
6537                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6538                 break;
6539         }
6540 }
6541
6542 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6543 {
6544         /* init asci gds info */
6545         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6546         adev->gds.gws.total_size = 64;
6547         adev->gds.oa.total_size = 16;
6548
6549         if (adev->gds.mem.total_size == 64 * 1024) {
6550                 adev->gds.mem.gfx_partition_size = 4096;
6551                 adev->gds.mem.cs_partition_size = 4096;
6552
6553                 adev->gds.gws.gfx_partition_size = 4;
6554                 adev->gds.gws.cs_partition_size = 4;
6555
6556                 adev->gds.oa.gfx_partition_size = 4;
6557                 adev->gds.oa.cs_partition_size = 1;
6558         } else {
6559                 adev->gds.mem.gfx_partition_size = 1024;
6560                 adev->gds.mem.cs_partition_size = 1024;
6561
6562                 adev->gds.gws.gfx_partition_size = 16;
6563                 adev->gds.gws.cs_partition_size = 16;
6564
6565                 adev->gds.oa.gfx_partition_size = 4;
6566                 adev->gds.oa.cs_partition_size = 4;
6567         }
6568 }
6569
6570 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6571                                                  u32 bitmap)
6572 {
6573         u32 data;
6574
6575         if (!bitmap)
6576                 return;
6577
6578         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6579         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6580
6581         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6582 }
6583
6584 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6585 {
6586         u32 data, mask;
6587
6588         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6589                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6590
6591         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6592
6593         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6594 }
6595
6596 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6597 {
6598         int i, j, k, counter, active_cu_number = 0;
6599         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6600         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6601         unsigned disable_masks[4 * 2];
6602
6603         memset(cu_info, 0, sizeof(*cu_info));
6604
6605         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6606
6607         mutex_lock(&adev->grbm_idx_mutex);
6608         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6609                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6610                         mask = 1;
6611                         ao_bitmap = 0;
6612                         counter = 0;
6613                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6614                         if (i < 4 && j < 2)
6615                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6616                                         adev, disable_masks[i * 2 + j]);
6617                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6618                         cu_info->bitmap[i][j] = bitmap;
6619
6620                         for (k = 0; k < 16; k ++) {
6621                                 if (bitmap & mask) {
6622                                         if (counter < 2)
6623                                                 ao_bitmap |= mask;
6624                                         counter ++;
6625                                 }
6626                                 mask <<= 1;
6627                         }
6628                         active_cu_number += counter;
6629                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6630                 }
6631         }
6632         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6633         mutex_unlock(&adev->grbm_idx_mutex);
6634
6635         cu_info->number = active_cu_number;
6636         cu_info->ao_cu_mask = ao_cu_mask;
6637 }
6638
6639 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6640 {
6641         .type = AMD_IP_BLOCK_TYPE_GFX,
6642         .major = 8,
6643         .minor = 0,
6644         .rev = 0,
6645         .funcs = &gfx_v8_0_ip_funcs,
6646 };
6647
6648 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6649 {
6650         .type = AMD_IP_BLOCK_TYPE_GFX,
6651         .major = 8,
6652         .minor = 1,
6653         .rev = 0,
6654         .funcs = &gfx_v8_0_ip_funcs,
6655 };