drm/amdgpu: add GFX support for ELM/BAF
[linux-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49
50 #define GFX8_NUM_GFX_RINGS     1
51 #define GFX8_NUM_COMPUTE_RINGS 8
52
53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
55 #define BAFFIN_GB_ADDR_CONFIG_GOLDEN 0x22011002
56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
57
58 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
59 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
60 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
61 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
62 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
63 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
64 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
65 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
66 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
67
68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
74
75 /* BPM SERDES CMD */
76 #define SET_BPM_SERDES_CMD    1
77 #define CLE_BPM_SERDES_CMD    0
78
79 /* BPM Register Address*/
80 enum {
81         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
82         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
83         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
84         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
85         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
86         BPM_REG_FGCG_MAX
87 };
88
89 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
90 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
97 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
103 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
108
109 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
110 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
116 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
121
122 MODULE_FIRMWARE("amdgpu/baffin_ce.bin");
123 MODULE_FIRMWARE("amdgpu/baffin_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/baffin_me.bin");
125 MODULE_FIRMWARE("amdgpu/baffin_mec.bin");
126 MODULE_FIRMWARE("amdgpu/baffin_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/baffin_rlc.bin");
128
129 MODULE_FIRMWARE("amdgpu/ellesmere_ce.bin");
130 MODULE_FIRMWARE("amdgpu/ellesmere_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/ellesmere_me.bin");
132 MODULE_FIRMWARE("amdgpu/ellesmere_mec.bin");
133 MODULE_FIRMWARE("amdgpu/ellesmere_mec2.bin");
134 MODULE_FIRMWARE("amdgpu/ellesmere_rlc.bin");
135
136 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
137 {
138         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
139         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
140         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
141         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
142         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
143         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
144         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
145         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
146         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
147         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
148         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
149         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
150         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
151         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
152         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
153         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
154 };
155
156 static const u32 golden_settings_tonga_a11[] =
157 {
158         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
159         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
160         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
161         mmGB_GPU_ID, 0x0000000f, 0x00000000,
162         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
163         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
164         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
165         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
166         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
167         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
168         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
169         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
170         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
171         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
172         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
173 };
174
175 static const u32 tonga_golden_common_all[] =
176 {
177         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
178         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
179         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
180         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
181         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
182         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
183         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
184         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
185 };
186
187 static const u32 tonga_mgcg_cgcg_init[] =
188 {
189         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
190         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
191         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
192         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
193         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
194         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
195         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
196         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
198         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
200         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
203         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
207         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
208         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
209         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
210         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
211         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
212         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
215         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
216         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
217         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
218         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
221         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
222         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
223         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
224         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
225         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
228         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
233         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
238         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
243         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
248         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
253         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
258         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
261         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
262         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
263         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
264 };
265
266 static const u32 golden_settings_baffin_a11[] =
267 {
268         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
269         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
270         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
271         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
272         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
273         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
274         mmSQ_CONFIG, 0x07f80000, 0x07180000,
275         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
276         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
277         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
278         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
279         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
280 };
281
282 static const u32 baffin_golden_common_all[] =
283 {
284         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
285         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
286         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
287         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
288         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
289         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
290         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
291         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
292 };
293
294 static const u32 golden_settings_ellesmere_a11[] =
295 {
296         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
297         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
298         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
299         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
300         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
301         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
302         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
303         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
304         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
305         mmSQ_CONFIG, 0x07f80000, 0x07180000,
306         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
307         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
308         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
309         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
310 };
311
312 static const u32 ellesmere_golden_common_all[] =
313 {
314         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
315         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
316         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
317         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
318         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
319         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
320         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
321         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
322 };
323
324 static const u32 fiji_golden_common_all[] =
325 {
326         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
327         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
328         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
329         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
330         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
332         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
333         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
336 };
337
338 static const u32 golden_settings_fiji_a10[] =
339 {
340         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
341         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
342         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
343         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
344         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
345         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
346         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
349         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
350         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
351 };
352
353 static const u32 fiji_mgcg_cgcg_init[] =
354 {
355         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
356         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
358         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
359         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
360         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
361         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
362         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
364         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
366         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
373         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
374         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
375         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
376         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
377         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
380         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
381         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
382         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
383         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
384         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
385         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
386         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
387         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
388         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
389         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
390 };
391
392 static const u32 golden_settings_iceland_a11[] =
393 {
394         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
395         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
396         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
397         mmGB_GPU_ID, 0x0000000f, 0x00000000,
398         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
399         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
400         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
401         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
402         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
403         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
404         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
405         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
406         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
407         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
408         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
409 };
410
411 static const u32 iceland_golden_common_all[] =
412 {
413         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
415         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
416         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
417         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
418         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
419         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
420         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
421 };
422
423 static const u32 iceland_mgcg_cgcg_init[] =
424 {
425         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
426         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
427         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
428         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
429         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
430         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
431         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
432         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
434         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
436         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
443         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
444         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
445         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
447         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
448         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
450         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
451         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
452         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
453         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
454         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
455         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
456         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
457         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
458         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
459         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
460         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
461         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
462         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
463         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
464         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
465         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
466         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
469         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
474         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
479         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
487         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
488         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
489 };
490
491 static const u32 cz_golden_settings_a11[] =
492 {
493         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
494         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
495         mmGB_GPU_ID, 0x0000000f, 0x00000000,
496         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
497         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
498         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
499         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
500         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
501         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
502         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
503 };
504
505 static const u32 cz_golden_common_all[] =
506 {
507         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
508         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
509         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
510         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
511         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
512         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
513         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
514         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
515 };
516
517 static const u32 cz_mgcg_cgcg_init[] =
518 {
519         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
520         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
521         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
522         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
523         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
524         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
525         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
526         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
527         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
528         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
530         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
532         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
534         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
535         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
537         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
538         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
539         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
540         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
541         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
542         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
544         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
545         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
546         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
547         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
548         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
549         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
550         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
558         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
568         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
573         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
578         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
583         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
588         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
591         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
592         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
593         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
594 };
595
596 static const u32 stoney_golden_settings_a11[] =
597 {
598         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
599         mmGB_GPU_ID, 0x0000000f, 0x00000000,
600         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
601         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
602         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
603         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
604         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
605         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
606         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
607         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
608 };
609
610 static const u32 stoney_golden_common_all[] =
611 {
612         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
613         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
614         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
615         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
616         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
617         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
618         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
619         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
620 };
621
622 static const u32 stoney_mgcg_cgcg_init[] =
623 {
624         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
625         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
626         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
627         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
628         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
629         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
630 };
631
632 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
633 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
634 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
635 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
636
637 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
638 {
639         switch (adev->asic_type) {
640         case CHIP_TOPAZ:
641                 amdgpu_program_register_sequence(adev,
642                                                  iceland_mgcg_cgcg_init,
643                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
644                 amdgpu_program_register_sequence(adev,
645                                                  golden_settings_iceland_a11,
646                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
647                 amdgpu_program_register_sequence(adev,
648                                                  iceland_golden_common_all,
649                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
650                 break;
651         case CHIP_FIJI:
652                 amdgpu_program_register_sequence(adev,
653                                                  fiji_mgcg_cgcg_init,
654                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
655                 amdgpu_program_register_sequence(adev,
656                                                  golden_settings_fiji_a10,
657                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
658                 amdgpu_program_register_sequence(adev,
659                                                  fiji_golden_common_all,
660                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
661                 break;
662
663         case CHIP_TONGA:
664                 amdgpu_program_register_sequence(adev,
665                                                  tonga_mgcg_cgcg_init,
666                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
667                 amdgpu_program_register_sequence(adev,
668                                                  golden_settings_tonga_a11,
669                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
670                 amdgpu_program_register_sequence(adev,
671                                                  tonga_golden_common_all,
672                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
673                 break;
674         case CHIP_BAFFIN:
675                 amdgpu_program_register_sequence(adev,
676                                                  golden_settings_baffin_a11,
677                                                  (const u32)ARRAY_SIZE(golden_settings_baffin_a11));
678                 amdgpu_program_register_sequence(adev,
679                                                  baffin_golden_common_all,
680                                                  (const u32)ARRAY_SIZE(baffin_golden_common_all));
681                 break;
682         case CHIP_ELLESMERE:
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_ellesmere_a11,
685                                                  (const u32)ARRAY_SIZE(golden_settings_ellesmere_a11));
686                 amdgpu_program_register_sequence(adev,
687                                                  ellesmere_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(ellesmere_golden_common_all));
689                 break;
690         case CHIP_CARRIZO:
691                 amdgpu_program_register_sequence(adev,
692                                                  cz_mgcg_cgcg_init,
693                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
694                 amdgpu_program_register_sequence(adev,
695                                                  cz_golden_settings_a11,
696                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
697                 amdgpu_program_register_sequence(adev,
698                                                  cz_golden_common_all,
699                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
700                 break;
701         case CHIP_STONEY:
702                 amdgpu_program_register_sequence(adev,
703                                                  stoney_mgcg_cgcg_init,
704                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
705                 amdgpu_program_register_sequence(adev,
706                                                  stoney_golden_settings_a11,
707                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
708                 amdgpu_program_register_sequence(adev,
709                                                  stoney_golden_common_all,
710                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
711                 break;
712         default:
713                 break;
714         }
715 }
716
717 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
718 {
719         int i;
720
721         adev->gfx.scratch.num_reg = 7;
722         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
723         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
724                 adev->gfx.scratch.free[i] = true;
725                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
726         }
727 }
728
729 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
730 {
731         struct amdgpu_device *adev = ring->adev;
732         uint32_t scratch;
733         uint32_t tmp = 0;
734         unsigned i;
735         int r;
736
737         r = amdgpu_gfx_scratch_get(adev, &scratch);
738         if (r) {
739                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
740                 return r;
741         }
742         WREG32(scratch, 0xCAFEDEAD);
743         r = amdgpu_ring_alloc(ring, 3);
744         if (r) {
745                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
746                           ring->idx, r);
747                 amdgpu_gfx_scratch_free(adev, scratch);
748                 return r;
749         }
750         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
751         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
752         amdgpu_ring_write(ring, 0xDEADBEEF);
753         amdgpu_ring_commit(ring);
754
755         for (i = 0; i < adev->usec_timeout; i++) {
756                 tmp = RREG32(scratch);
757                 if (tmp == 0xDEADBEEF)
758                         break;
759                 DRM_UDELAY(1);
760         }
761         if (i < adev->usec_timeout) {
762                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
763                          ring->idx, i);
764         } else {
765                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
766                           ring->idx, scratch, tmp);
767                 r = -EINVAL;
768         }
769         amdgpu_gfx_scratch_free(adev, scratch);
770         return r;
771 }
772
773 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
774 {
775         struct amdgpu_device *adev = ring->adev;
776         struct amdgpu_ib ib;
777         struct fence *f = NULL;
778         uint32_t scratch;
779         uint32_t tmp = 0;
780         unsigned i;
781         int r;
782
783         r = amdgpu_gfx_scratch_get(adev, &scratch);
784         if (r) {
785                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
786                 return r;
787         }
788         WREG32(scratch, 0xCAFEDEAD);
789         memset(&ib, 0, sizeof(ib));
790         r = amdgpu_ib_get(adev, NULL, 256, &ib);
791         if (r) {
792                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
793                 goto err1;
794         }
795         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
796         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
797         ib.ptr[2] = 0xDEADBEEF;
798         ib.length_dw = 3;
799
800         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
801         if (r)
802                 goto err2;
803
804         r = fence_wait(f, false);
805         if (r) {
806                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
807                 goto err2;
808         }
809         for (i = 0; i < adev->usec_timeout; i++) {
810                 tmp = RREG32(scratch);
811                 if (tmp == 0xDEADBEEF)
812                         break;
813                 DRM_UDELAY(1);
814         }
815         if (i < adev->usec_timeout) {
816                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
817                          ring->idx, i);
818                 goto err2;
819         } else {
820                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
821                           scratch, tmp);
822                 r = -EINVAL;
823         }
824 err2:
825         fence_put(f);
826         amdgpu_ib_free(adev, &ib, NULL);
827         fence_put(f);
828 err1:
829         amdgpu_gfx_scratch_free(adev, scratch);
830         return r;
831 }
832
833 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
834 {
835         const char *chip_name;
836         char fw_name[30];
837         int err;
838         struct amdgpu_firmware_info *info = NULL;
839         const struct common_firmware_header *header = NULL;
840         const struct gfx_firmware_header_v1_0 *cp_hdr;
841
842         DRM_DEBUG("\n");
843
844         switch (adev->asic_type) {
845         case CHIP_TOPAZ:
846                 chip_name = "topaz";
847                 break;
848         case CHIP_TONGA:
849                 chip_name = "tonga";
850                 break;
851         case CHIP_CARRIZO:
852                 chip_name = "carrizo";
853                 break;
854         case CHIP_FIJI:
855                 chip_name = "fiji";
856                 break;
857         case CHIP_BAFFIN:
858                 chip_name = "baffin";
859                 break;
860         case CHIP_ELLESMERE:
861                 chip_name = "ellesmere";
862                 break;
863         case CHIP_STONEY:
864                 chip_name = "stoney";
865                 break;
866         default:
867                 BUG();
868         }
869
870         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
871         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
872         if (err)
873                 goto out;
874         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
875         if (err)
876                 goto out;
877         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
878         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
879         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
880
881         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
882         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
883         if (err)
884                 goto out;
885         err = amdgpu_ucode_validate(adev->gfx.me_fw);
886         if (err)
887                 goto out;
888         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
889         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
890         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
891
892         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
893         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
894         if (err)
895                 goto out;
896         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
897         if (err)
898                 goto out;
899         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
900         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
901         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
902
903         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
904         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
905         if (err)
906                 goto out;
907         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
908         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
909         adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
910         adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
911
912         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
913         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
914         if (err)
915                 goto out;
916         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
917         if (err)
918                 goto out;
919         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
920         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
921         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
922
923         if ((adev->asic_type != CHIP_STONEY) &&
924             (adev->asic_type != CHIP_TOPAZ)) {
925                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
926                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
927                 if (!err) {
928                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
929                         if (err)
930                                 goto out;
931                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
932                                 adev->gfx.mec2_fw->data;
933                         adev->gfx.mec2_fw_version =
934                                 le32_to_cpu(cp_hdr->header.ucode_version);
935                         adev->gfx.mec2_feature_version =
936                                 le32_to_cpu(cp_hdr->ucode_feature_version);
937                 } else {
938                         err = 0;
939                         adev->gfx.mec2_fw = NULL;
940                 }
941         }
942
943         if (adev->firmware.smu_load) {
944                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
945                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
946                 info->fw = adev->gfx.pfp_fw;
947                 header = (const struct common_firmware_header *)info->fw->data;
948                 adev->firmware.fw_size +=
949                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
950
951                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
952                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
953                 info->fw = adev->gfx.me_fw;
954                 header = (const struct common_firmware_header *)info->fw->data;
955                 adev->firmware.fw_size +=
956                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
957
958                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
959                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
960                 info->fw = adev->gfx.ce_fw;
961                 header = (const struct common_firmware_header *)info->fw->data;
962                 adev->firmware.fw_size +=
963                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
964
965                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
966                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
967                 info->fw = adev->gfx.rlc_fw;
968                 header = (const struct common_firmware_header *)info->fw->data;
969                 adev->firmware.fw_size +=
970                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
971
972                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
973                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
974                 info->fw = adev->gfx.mec_fw;
975                 header = (const struct common_firmware_header *)info->fw->data;
976                 adev->firmware.fw_size +=
977                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
978
979                 if (adev->gfx.mec2_fw) {
980                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
981                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
982                         info->fw = adev->gfx.mec2_fw;
983                         header = (const struct common_firmware_header *)info->fw->data;
984                         adev->firmware.fw_size +=
985                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
986                 }
987
988         }
989
990 out:
991         if (err) {
992                 dev_err(adev->dev,
993                         "gfx8: Failed to load firmware \"%s\"\n",
994                         fw_name);
995                 release_firmware(adev->gfx.pfp_fw);
996                 adev->gfx.pfp_fw = NULL;
997                 release_firmware(adev->gfx.me_fw);
998                 adev->gfx.me_fw = NULL;
999                 release_firmware(adev->gfx.ce_fw);
1000                 adev->gfx.ce_fw = NULL;
1001                 release_firmware(adev->gfx.rlc_fw);
1002                 adev->gfx.rlc_fw = NULL;
1003                 release_firmware(adev->gfx.mec_fw);
1004                 adev->gfx.mec_fw = NULL;
1005                 release_firmware(adev->gfx.mec2_fw);
1006                 adev->gfx.mec2_fw = NULL;
1007         }
1008         return err;
1009 }
1010
1011 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1012 {
1013         int r;
1014
1015         if (adev->gfx.mec.hpd_eop_obj) {
1016                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1017                 if (unlikely(r != 0))
1018                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1019                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1020                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1021
1022                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1023                 adev->gfx.mec.hpd_eop_obj = NULL;
1024         }
1025 }
1026
1027 #define MEC_HPD_SIZE 2048
1028
1029 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1030 {
1031         int r;
1032         u32 *hpd;
1033
1034         /*
1035          * we assign only 1 pipe because all other pipes will
1036          * be handled by KFD
1037          */
1038         adev->gfx.mec.num_mec = 1;
1039         adev->gfx.mec.num_pipe = 1;
1040         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1041
1042         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1043                 r = amdgpu_bo_create(adev,
1044                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1045                                      PAGE_SIZE, true,
1046                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1047                                      &adev->gfx.mec.hpd_eop_obj);
1048                 if (r) {
1049                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1050                         return r;
1051                 }
1052         }
1053
1054         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1055         if (unlikely(r != 0)) {
1056                 gfx_v8_0_mec_fini(adev);
1057                 return r;
1058         }
1059         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1060                           &adev->gfx.mec.hpd_eop_gpu_addr);
1061         if (r) {
1062                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1063                 gfx_v8_0_mec_fini(adev);
1064                 return r;
1065         }
1066         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1067         if (r) {
1068                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1069                 gfx_v8_0_mec_fini(adev);
1070                 return r;
1071         }
1072
1073         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1074
1075         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1076         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1077
1078         return 0;
1079 }
1080
1081 static const u32 vgpr_init_compute_shader[] =
1082 {
1083         0x7e000209, 0x7e020208,
1084         0x7e040207, 0x7e060206,
1085         0x7e080205, 0x7e0a0204,
1086         0x7e0c0203, 0x7e0e0202,
1087         0x7e100201, 0x7e120200,
1088         0x7e140209, 0x7e160208,
1089         0x7e180207, 0x7e1a0206,
1090         0x7e1c0205, 0x7e1e0204,
1091         0x7e200203, 0x7e220202,
1092         0x7e240201, 0x7e260200,
1093         0x7e280209, 0x7e2a0208,
1094         0x7e2c0207, 0x7e2e0206,
1095         0x7e300205, 0x7e320204,
1096         0x7e340203, 0x7e360202,
1097         0x7e380201, 0x7e3a0200,
1098         0x7e3c0209, 0x7e3e0208,
1099         0x7e400207, 0x7e420206,
1100         0x7e440205, 0x7e460204,
1101         0x7e480203, 0x7e4a0202,
1102         0x7e4c0201, 0x7e4e0200,
1103         0x7e500209, 0x7e520208,
1104         0x7e540207, 0x7e560206,
1105         0x7e580205, 0x7e5a0204,
1106         0x7e5c0203, 0x7e5e0202,
1107         0x7e600201, 0x7e620200,
1108         0x7e640209, 0x7e660208,
1109         0x7e680207, 0x7e6a0206,
1110         0x7e6c0205, 0x7e6e0204,
1111         0x7e700203, 0x7e720202,
1112         0x7e740201, 0x7e760200,
1113         0x7e780209, 0x7e7a0208,
1114         0x7e7c0207, 0x7e7e0206,
1115         0xbf8a0000, 0xbf810000,
1116 };
1117
1118 static const u32 sgpr_init_compute_shader[] =
1119 {
1120         0xbe8a0100, 0xbe8c0102,
1121         0xbe8e0104, 0xbe900106,
1122         0xbe920108, 0xbe940100,
1123         0xbe960102, 0xbe980104,
1124         0xbe9a0106, 0xbe9c0108,
1125         0xbe9e0100, 0xbea00102,
1126         0xbea20104, 0xbea40106,
1127         0xbea60108, 0xbea80100,
1128         0xbeaa0102, 0xbeac0104,
1129         0xbeae0106, 0xbeb00108,
1130         0xbeb20100, 0xbeb40102,
1131         0xbeb60104, 0xbeb80106,
1132         0xbeba0108, 0xbebc0100,
1133         0xbebe0102, 0xbec00104,
1134         0xbec20106, 0xbec40108,
1135         0xbec60100, 0xbec80102,
1136         0xbee60004, 0xbee70005,
1137         0xbeea0006, 0xbeeb0007,
1138         0xbee80008, 0xbee90009,
1139         0xbefc0000, 0xbf8a0000,
1140         0xbf810000, 0x00000000,
1141 };
1142
1143 static const u32 vgpr_init_regs[] =
1144 {
1145         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1146         mmCOMPUTE_RESOURCE_LIMITS, 0,
1147         mmCOMPUTE_NUM_THREAD_X, 256*4,
1148         mmCOMPUTE_NUM_THREAD_Y, 1,
1149         mmCOMPUTE_NUM_THREAD_Z, 1,
1150         mmCOMPUTE_PGM_RSRC2, 20,
1151         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1152         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1153         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1154         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1155         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1156         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1157         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1158         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1159         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1160         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1161 };
1162
1163 static const u32 sgpr1_init_regs[] =
1164 {
1165         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1166         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1167         mmCOMPUTE_NUM_THREAD_X, 256*5,
1168         mmCOMPUTE_NUM_THREAD_Y, 1,
1169         mmCOMPUTE_NUM_THREAD_Z, 1,
1170         mmCOMPUTE_PGM_RSRC2, 20,
1171         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1172         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1173         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1174         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1175         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1176         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1177         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1178         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1179         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1180         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1181 };
1182
1183 static const u32 sgpr2_init_regs[] =
1184 {
1185         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1186         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1187         mmCOMPUTE_NUM_THREAD_X, 256*5,
1188         mmCOMPUTE_NUM_THREAD_Y, 1,
1189         mmCOMPUTE_NUM_THREAD_Z, 1,
1190         mmCOMPUTE_PGM_RSRC2, 20,
1191         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1192         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1193         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1194         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1195         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1196         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1197         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1198         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1199         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1200         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1201 };
1202
1203 static const u32 sec_ded_counter_registers[] =
1204 {
1205         mmCPC_EDC_ATC_CNT,
1206         mmCPC_EDC_SCRATCH_CNT,
1207         mmCPC_EDC_UCODE_CNT,
1208         mmCPF_EDC_ATC_CNT,
1209         mmCPF_EDC_ROQ_CNT,
1210         mmCPF_EDC_TAG_CNT,
1211         mmCPG_EDC_ATC_CNT,
1212         mmCPG_EDC_DMA_CNT,
1213         mmCPG_EDC_TAG_CNT,
1214         mmDC_EDC_CSINVOC_CNT,
1215         mmDC_EDC_RESTORE_CNT,
1216         mmDC_EDC_STATE_CNT,
1217         mmGDS_EDC_CNT,
1218         mmGDS_EDC_GRBM_CNT,
1219         mmGDS_EDC_OA_DED,
1220         mmSPI_EDC_CNT,
1221         mmSQC_ATC_EDC_GATCL1_CNT,
1222         mmSQC_EDC_CNT,
1223         mmSQ_EDC_DED_CNT,
1224         mmSQ_EDC_INFO,
1225         mmSQ_EDC_SEC_CNT,
1226         mmTCC_EDC_CNT,
1227         mmTCP_ATC_EDC_GATCL1_CNT,
1228         mmTCP_EDC_CNT,
1229         mmTD_EDC_CNT
1230 };
1231
1232 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1233 {
1234         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1235         struct amdgpu_ib ib;
1236         struct fence *f = NULL;
1237         int r, i;
1238         u32 tmp;
1239         unsigned total_size, vgpr_offset, sgpr_offset;
1240         u64 gpu_addr;
1241
1242         /* only supported on CZ */
1243         if (adev->asic_type != CHIP_CARRIZO)
1244                 return 0;
1245
1246         /* bail if the compute ring is not ready */
1247         if (!ring->ready)
1248                 return 0;
1249
1250         tmp = RREG32(mmGB_EDC_MODE);
1251         WREG32(mmGB_EDC_MODE, 0);
1252
1253         total_size =
1254                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1255         total_size +=
1256                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1257         total_size +=
1258                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1259         total_size = ALIGN(total_size, 256);
1260         vgpr_offset = total_size;
1261         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1262         sgpr_offset = total_size;
1263         total_size += sizeof(sgpr_init_compute_shader);
1264
1265         /* allocate an indirect buffer to put the commands in */
1266         memset(&ib, 0, sizeof(ib));
1267         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1268         if (r) {
1269                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1270                 return r;
1271         }
1272
1273         /* load the compute shaders */
1274         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1275                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1276
1277         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1278                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1279
1280         /* init the ib length to 0 */
1281         ib.length_dw = 0;
1282
1283         /* VGPR */
1284         /* write the register state for the compute dispatch */
1285         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1286                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1287                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1288                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1289         }
1290         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1291         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1292         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1293         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1294         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1295         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1296
1297         /* write dispatch packet */
1298         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1299         ib.ptr[ib.length_dw++] = 8; /* x */
1300         ib.ptr[ib.length_dw++] = 1; /* y */
1301         ib.ptr[ib.length_dw++] = 1; /* z */
1302         ib.ptr[ib.length_dw++] =
1303                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1304
1305         /* write CS partial flush packet */
1306         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1307         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1308
1309         /* SGPR1 */
1310         /* write the register state for the compute dispatch */
1311         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1312                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1313                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1314                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1315         }
1316         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1317         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1318         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1319         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1320         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1321         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1322
1323         /* write dispatch packet */
1324         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1325         ib.ptr[ib.length_dw++] = 8; /* x */
1326         ib.ptr[ib.length_dw++] = 1; /* y */
1327         ib.ptr[ib.length_dw++] = 1; /* z */
1328         ib.ptr[ib.length_dw++] =
1329                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1330
1331         /* write CS partial flush packet */
1332         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1333         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1334
1335         /* SGPR2 */
1336         /* write the register state for the compute dispatch */
1337         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1338                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1339                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1340                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1341         }
1342         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1343         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1344         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1345         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1346         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1347         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1348
1349         /* write dispatch packet */
1350         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1351         ib.ptr[ib.length_dw++] = 8; /* x */
1352         ib.ptr[ib.length_dw++] = 1; /* y */
1353         ib.ptr[ib.length_dw++] = 1; /* z */
1354         ib.ptr[ib.length_dw++] =
1355                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1356
1357         /* write CS partial flush packet */
1358         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1359         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1360
1361         /* shedule the ib on the ring */
1362         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1363         if (r) {
1364                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1365                 goto fail;
1366         }
1367
1368         /* wait for the GPU to finish processing the IB */
1369         r = fence_wait(f, false);
1370         if (r) {
1371                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1372                 goto fail;
1373         }
1374
1375         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1376         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1377         WREG32(mmGB_EDC_MODE, tmp);
1378
1379         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1380         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1381         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1382
1383
1384         /* read back registers to clear the counters */
1385         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1386                 RREG32(sec_ded_counter_registers[i]);
1387
1388 fail:
1389         fence_put(f);
1390         amdgpu_ib_free(adev, &ib, NULL);
1391         fence_put(f);
1392
1393         return r;
1394 }
1395
1396 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1397 {
1398         u32 gb_addr_config;
1399         u32 mc_shared_chmap, mc_arb_ramcfg;
1400         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1401         u32 tmp;
1402         int ret;
1403
1404         switch (adev->asic_type) {
1405         case CHIP_TOPAZ:
1406                 adev->gfx.config.max_shader_engines = 1;
1407                 adev->gfx.config.max_tile_pipes = 2;
1408                 adev->gfx.config.max_cu_per_sh = 6;
1409                 adev->gfx.config.max_sh_per_se = 1;
1410                 adev->gfx.config.max_backends_per_se = 2;
1411                 adev->gfx.config.max_texture_channel_caches = 2;
1412                 adev->gfx.config.max_gprs = 256;
1413                 adev->gfx.config.max_gs_threads = 32;
1414                 adev->gfx.config.max_hw_contexts = 8;
1415
1416                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1417                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1418                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1419                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1420                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1421                 break;
1422         case CHIP_FIJI:
1423                 adev->gfx.config.max_shader_engines = 4;
1424                 adev->gfx.config.max_tile_pipes = 16;
1425                 adev->gfx.config.max_cu_per_sh = 16;
1426                 adev->gfx.config.max_sh_per_se = 1;
1427                 adev->gfx.config.max_backends_per_se = 4;
1428                 adev->gfx.config.max_texture_channel_caches = 16;
1429                 adev->gfx.config.max_gprs = 256;
1430                 adev->gfx.config.max_gs_threads = 32;
1431                 adev->gfx.config.max_hw_contexts = 8;
1432
1433                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1434                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1435                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1436                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1437                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1438                 break;
1439         case CHIP_BAFFIN:
1440                 ret = amdgpu_atombios_get_gfx_info(adev);
1441                 if (ret)
1442                         return ret;
1443                 adev->gfx.config.max_gprs = 256;
1444                 adev->gfx.config.max_gs_threads = 32;
1445                 adev->gfx.config.max_hw_contexts = 8;
1446
1447                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1448                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1449                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1450                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1451                 gb_addr_config = BAFFIN_GB_ADDR_CONFIG_GOLDEN;
1452                 break;
1453         case CHIP_ELLESMERE:
1454                 ret = amdgpu_atombios_get_gfx_info(adev);
1455                 if (ret)
1456                         return ret;
1457                 adev->gfx.config.max_gprs = 256;
1458                 adev->gfx.config.max_gs_threads = 32;
1459                 adev->gfx.config.max_hw_contexts = 8;
1460
1461                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1462                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1463                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1464                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1465                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1466                 break;
1467         case CHIP_TONGA:
1468                 adev->gfx.config.max_shader_engines = 4;
1469                 adev->gfx.config.max_tile_pipes = 8;
1470                 adev->gfx.config.max_cu_per_sh = 8;
1471                 adev->gfx.config.max_sh_per_se = 1;
1472                 adev->gfx.config.max_backends_per_se = 2;
1473                 adev->gfx.config.max_texture_channel_caches = 8;
1474                 adev->gfx.config.max_gprs = 256;
1475                 adev->gfx.config.max_gs_threads = 32;
1476                 adev->gfx.config.max_hw_contexts = 8;
1477
1478                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1479                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1480                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1481                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1482                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1483                 break;
1484         case CHIP_CARRIZO:
1485                 adev->gfx.config.max_shader_engines = 1;
1486                 adev->gfx.config.max_tile_pipes = 2;
1487                 adev->gfx.config.max_sh_per_se = 1;
1488                 adev->gfx.config.max_backends_per_se = 2;
1489
1490                 switch (adev->pdev->revision) {
1491                 case 0xc4:
1492                 case 0x84:
1493                 case 0xc8:
1494                 case 0xcc:
1495                 case 0xe1:
1496                 case 0xe3:
1497                         /* B10 */
1498                         adev->gfx.config.max_cu_per_sh = 8;
1499                         break;
1500                 case 0xc5:
1501                 case 0x81:
1502                 case 0x85:
1503                 case 0xc9:
1504                 case 0xcd:
1505                 case 0xe2:
1506                 case 0xe4:
1507                         /* B8 */
1508                         adev->gfx.config.max_cu_per_sh = 6;
1509                         break;
1510                 case 0xc6:
1511                 case 0xca:
1512                 case 0xce:
1513                 case 0x88:
1514                         /* B6 */
1515                         adev->gfx.config.max_cu_per_sh = 6;
1516                         break;
1517                 case 0xc7:
1518                 case 0x87:
1519                 case 0xcb:
1520                 case 0xe5:
1521                 case 0x89:
1522                 default:
1523                         /* B4 */
1524                         adev->gfx.config.max_cu_per_sh = 4;
1525                         break;
1526                 }
1527
1528                 adev->gfx.config.max_texture_channel_caches = 2;
1529                 adev->gfx.config.max_gprs = 256;
1530                 adev->gfx.config.max_gs_threads = 32;
1531                 adev->gfx.config.max_hw_contexts = 8;
1532
1533                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1534                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1535                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1536                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1537                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1538                 break;
1539         case CHIP_STONEY:
1540                 adev->gfx.config.max_shader_engines = 1;
1541                 adev->gfx.config.max_tile_pipes = 2;
1542                 adev->gfx.config.max_sh_per_se = 1;
1543                 adev->gfx.config.max_backends_per_se = 1;
1544
1545                 switch (adev->pdev->revision) {
1546                 case 0xc0:
1547                 case 0xc1:
1548                 case 0xc2:
1549                 case 0xc4:
1550                 case 0xc8:
1551                 case 0xc9:
1552                         adev->gfx.config.max_cu_per_sh = 3;
1553                         break;
1554                 case 0xd0:
1555                 case 0xd1:
1556                 case 0xd2:
1557                 default:
1558                         adev->gfx.config.max_cu_per_sh = 2;
1559                         break;
1560                 }
1561
1562                 adev->gfx.config.max_texture_channel_caches = 2;
1563                 adev->gfx.config.max_gprs = 256;
1564                 adev->gfx.config.max_gs_threads = 16;
1565                 adev->gfx.config.max_hw_contexts = 8;
1566
1567                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1568                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1569                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1570                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1571                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1572                 break;
1573         default:
1574                 adev->gfx.config.max_shader_engines = 2;
1575                 adev->gfx.config.max_tile_pipes = 4;
1576                 adev->gfx.config.max_cu_per_sh = 2;
1577                 adev->gfx.config.max_sh_per_se = 1;
1578                 adev->gfx.config.max_backends_per_se = 2;
1579                 adev->gfx.config.max_texture_channel_caches = 4;
1580                 adev->gfx.config.max_gprs = 256;
1581                 adev->gfx.config.max_gs_threads = 32;
1582                 adev->gfx.config.max_hw_contexts = 8;
1583
1584                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1585                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1586                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1587                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1588                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1589                 break;
1590         }
1591
1592         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1593         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1594         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1595
1596         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1597         adev->gfx.config.mem_max_burst_length_bytes = 256;
1598         if (adev->flags & AMD_IS_APU) {
1599                 /* Get memory bank mapping mode. */
1600                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1601                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1602                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1603
1604                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1605                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1606                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1607
1608                 /* Validate settings in case only one DIMM installed. */
1609                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1610                         dimm00_addr_map = 0;
1611                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1612                         dimm01_addr_map = 0;
1613                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1614                         dimm10_addr_map = 0;
1615                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1616                         dimm11_addr_map = 0;
1617
1618                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1619                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1620                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1621                         adev->gfx.config.mem_row_size_in_kb = 2;
1622                 else
1623                         adev->gfx.config.mem_row_size_in_kb = 1;
1624         } else {
1625                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1626                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1627                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1628                         adev->gfx.config.mem_row_size_in_kb = 4;
1629         }
1630
1631         adev->gfx.config.shader_engine_tile_size = 32;
1632         adev->gfx.config.num_gpus = 1;
1633         adev->gfx.config.multi_gpu_tile_size = 64;
1634
1635         /* fix up row size */
1636         switch (adev->gfx.config.mem_row_size_in_kb) {
1637         case 1:
1638         default:
1639                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1640                 break;
1641         case 2:
1642                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1643                 break;
1644         case 4:
1645                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1646                 break;
1647         }
1648         adev->gfx.config.gb_addr_config = gb_addr_config;
1649
1650         return 0;
1651 }
1652
1653 static int gfx_v8_0_sw_init(void *handle)
1654 {
1655         int i, r;
1656         struct amdgpu_ring *ring;
1657         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1658
1659         /* EOP Event */
1660         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1661         if (r)
1662                 return r;
1663
1664         /* Privileged reg */
1665         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1666         if (r)
1667                 return r;
1668
1669         /* Privileged inst */
1670         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1671         if (r)
1672                 return r;
1673
1674         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1675
1676         gfx_v8_0_scratch_init(adev);
1677
1678         r = gfx_v8_0_init_microcode(adev);
1679         if (r) {
1680                 DRM_ERROR("Failed to load gfx firmware!\n");
1681                 return r;
1682         }
1683
1684         r = gfx_v8_0_mec_init(adev);
1685         if (r) {
1686                 DRM_ERROR("Failed to init MEC BOs!\n");
1687                 return r;
1688         }
1689
1690         /* set up the gfx ring */
1691         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1692                 ring = &adev->gfx.gfx_ring[i];
1693                 ring->ring_obj = NULL;
1694                 sprintf(ring->name, "gfx");
1695                 /* no gfx doorbells on iceland */
1696                 if (adev->asic_type != CHIP_TOPAZ) {
1697                         ring->use_doorbell = true;
1698                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1699                 }
1700
1701                 r = amdgpu_ring_init(adev, ring, 1024,
1702                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1703                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1704                                      AMDGPU_RING_TYPE_GFX);
1705                 if (r)
1706                         return r;
1707         }
1708
1709         /* set up the compute queues */
1710         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1711                 unsigned irq_type;
1712
1713                 /* max 32 queues per MEC */
1714                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1715                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1716                         break;
1717                 }
1718                 ring = &adev->gfx.compute_ring[i];
1719                 ring->ring_obj = NULL;
1720                 ring->use_doorbell = true;
1721                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1722                 ring->me = 1; /* first MEC */
1723                 ring->pipe = i / 8;
1724                 ring->queue = i % 8;
1725                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1726                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1727                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1728                 r = amdgpu_ring_init(adev, ring, 1024,
1729                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1730                                      &adev->gfx.eop_irq, irq_type,
1731                                      AMDGPU_RING_TYPE_COMPUTE);
1732                 if (r)
1733                         return r;
1734         }
1735
1736         /* reserve GDS, GWS and OA resource for gfx */
1737         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1738                         PAGE_SIZE, true,
1739                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1740                         NULL, &adev->gds.gds_gfx_bo);
1741         if (r)
1742                 return r;
1743
1744         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1745                 PAGE_SIZE, true,
1746                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1747                 NULL, &adev->gds.gws_gfx_bo);
1748         if (r)
1749                 return r;
1750
1751         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1752                         PAGE_SIZE, true,
1753                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1754                         NULL, &adev->gds.oa_gfx_bo);
1755         if (r)
1756                 return r;
1757
1758         adev->gfx.ce_ram_size = 0x8000;
1759
1760         r = gfx_v8_0_gpu_early_init(adev);
1761         if (r)
1762                 return r;
1763
1764         return 0;
1765 }
1766
1767 static int gfx_v8_0_sw_fini(void *handle)
1768 {
1769         int i;
1770         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1771
1772         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1773         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1774         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1775
1776         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1777                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1778         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1779                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1780
1781         gfx_v8_0_mec_fini(adev);
1782
1783         return 0;
1784 }
1785
1786 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1787 {
1788         uint32_t *modearray, *mod2array;
1789         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1790         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1791         u32 reg_offset;
1792
1793         modearray = adev->gfx.config.tile_mode_array;
1794         mod2array = adev->gfx.config.macrotile_mode_array;
1795
1796         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1797                 modearray[reg_offset] = 0;
1798
1799         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1800                 mod2array[reg_offset] = 0;
1801
1802         switch (adev->asic_type) {
1803         case CHIP_TOPAZ:
1804                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1805                                 PIPE_CONFIG(ADDR_SURF_P2) |
1806                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1807                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1808                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1809                                 PIPE_CONFIG(ADDR_SURF_P2) |
1810                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1811                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1812                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1813                                 PIPE_CONFIG(ADDR_SURF_P2) |
1814                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1815                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1816                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1817                                 PIPE_CONFIG(ADDR_SURF_P2) |
1818                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1819                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1820                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1821                                 PIPE_CONFIG(ADDR_SURF_P2) |
1822                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1823                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1824                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1825                                 PIPE_CONFIG(ADDR_SURF_P2) |
1826                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1827                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1828                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1829                                 PIPE_CONFIG(ADDR_SURF_P2) |
1830                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1831                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1832                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1833                                 PIPE_CONFIG(ADDR_SURF_P2));
1834                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1835                                 PIPE_CONFIG(ADDR_SURF_P2) |
1836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1837                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1838                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1839                                  PIPE_CONFIG(ADDR_SURF_P2) |
1840                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1841                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1842                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1843                                  PIPE_CONFIG(ADDR_SURF_P2) |
1844                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1845                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1846                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1847                                  PIPE_CONFIG(ADDR_SURF_P2) |
1848                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1849                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1850                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851                                  PIPE_CONFIG(ADDR_SURF_P2) |
1852                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1853                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1854                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1855                                  PIPE_CONFIG(ADDR_SURF_P2) |
1856                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1857                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1858                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1859                                  PIPE_CONFIG(ADDR_SURF_P2) |
1860                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1861                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1862                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1863                                  PIPE_CONFIG(ADDR_SURF_P2) |
1864                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1865                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1866                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1867                                  PIPE_CONFIG(ADDR_SURF_P2) |
1868                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1869                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1870                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1871                                  PIPE_CONFIG(ADDR_SURF_P2) |
1872                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1873                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1874                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1875                                  PIPE_CONFIG(ADDR_SURF_P2) |
1876                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1877                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1878                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1879                                  PIPE_CONFIG(ADDR_SURF_P2) |
1880                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1881                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1882                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1883                                  PIPE_CONFIG(ADDR_SURF_P2) |
1884                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1885                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1886                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1887                                  PIPE_CONFIG(ADDR_SURF_P2) |
1888                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1889                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1890                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1891                                  PIPE_CONFIG(ADDR_SURF_P2) |
1892                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1893                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1894                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1895                                  PIPE_CONFIG(ADDR_SURF_P2) |
1896                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1897                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1898                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1899                                  PIPE_CONFIG(ADDR_SURF_P2) |
1900                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1901                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1902                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1903                                  PIPE_CONFIG(ADDR_SURF_P2) |
1904                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1905                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1906
1907                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1908                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1909                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1910                                 NUM_BANKS(ADDR_SURF_8_BANK));
1911                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1912                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1913                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1914                                 NUM_BANKS(ADDR_SURF_8_BANK));
1915                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1916                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1917                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1918                                 NUM_BANKS(ADDR_SURF_8_BANK));
1919                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1920                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1921                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1922                                 NUM_BANKS(ADDR_SURF_8_BANK));
1923                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1924                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1925                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1926                                 NUM_BANKS(ADDR_SURF_8_BANK));
1927                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1929                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1930                                 NUM_BANKS(ADDR_SURF_8_BANK));
1931                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1932                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1933                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1934                                 NUM_BANKS(ADDR_SURF_8_BANK));
1935                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1936                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1937                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1938                                 NUM_BANKS(ADDR_SURF_16_BANK));
1939                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1940                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1941                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1942                                 NUM_BANKS(ADDR_SURF_16_BANK));
1943                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1944                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1945                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1946                                  NUM_BANKS(ADDR_SURF_16_BANK));
1947                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1948                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1949                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1950                                  NUM_BANKS(ADDR_SURF_16_BANK));
1951                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1953                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1954                                  NUM_BANKS(ADDR_SURF_16_BANK));
1955                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1956                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1957                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1958                                  NUM_BANKS(ADDR_SURF_16_BANK));
1959                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1960                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1961                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1962                                  NUM_BANKS(ADDR_SURF_8_BANK));
1963
1964                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1965                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1966                             reg_offset != 23)
1967                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1968
1969                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1970                         if (reg_offset != 7)
1971                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1972
1973                 break;
1974         case CHIP_FIJI:
1975                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1976                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1977                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1979                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1981                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1983                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1984                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1985                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1987                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1988                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1989                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1990                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1991                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1993                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1994                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1995                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1996                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1997                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1999                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2000                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2001                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2003                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2004                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2005                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2007                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2008                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2009                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2010                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2012                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2013                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2014                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2016                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2017                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2018                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2019                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2020                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2021                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2022                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2023                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2024                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2025                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2026                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2028                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2029                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2030                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2031                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2032                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2033                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2034                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2035                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2036                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2037                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2038                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2039                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2040                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2041                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2042                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2043                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2044                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2045                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2046                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2047                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2048                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2049                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2050                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2051                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2052                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2053                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2054                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2055                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2056                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2057                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2058                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2059                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2060                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2061                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2062                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2063                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2064                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2065                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2066                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2067                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2068                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2069                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2070                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2071                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2072                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2073                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2074                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2075                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2076                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2077                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2078                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2080                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2081                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2082                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2083                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2084                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2085                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2086                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2087                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2088                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2089                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2090                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2092                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2093                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2094                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2095                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2096                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2097
2098                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2099                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2100                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2101                                 NUM_BANKS(ADDR_SURF_8_BANK));
2102                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2103                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2104                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2105                                 NUM_BANKS(ADDR_SURF_8_BANK));
2106                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2107                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2108                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2109                                 NUM_BANKS(ADDR_SURF_8_BANK));
2110                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2111                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2112                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2113                                 NUM_BANKS(ADDR_SURF_8_BANK));
2114                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2115                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2116                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2117                                 NUM_BANKS(ADDR_SURF_8_BANK));
2118                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2121                                 NUM_BANKS(ADDR_SURF_8_BANK));
2122                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2123                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2124                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2125                                 NUM_BANKS(ADDR_SURF_8_BANK));
2126                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2127                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2128                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2129                                 NUM_BANKS(ADDR_SURF_8_BANK));
2130                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2131                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2132                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2133                                 NUM_BANKS(ADDR_SURF_8_BANK));
2134                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2135                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2136                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2137                                  NUM_BANKS(ADDR_SURF_8_BANK));
2138                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2139                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2140                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2141                                  NUM_BANKS(ADDR_SURF_8_BANK));
2142                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2143                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2144                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2145                                  NUM_BANKS(ADDR_SURF_8_BANK));
2146                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2147                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2148                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2149                                  NUM_BANKS(ADDR_SURF_8_BANK));
2150                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2151                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2152                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2153                                  NUM_BANKS(ADDR_SURF_4_BANK));
2154
2155                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2156                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2157
2158                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2159                         if (reg_offset != 7)
2160                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2161
2162                 break;
2163         case CHIP_TONGA:
2164                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2166                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2167                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2170                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2171                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2174                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2178                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2180                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2182                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2183                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2184                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2185                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2186                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2187                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2188                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2190                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2191                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2192                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2193                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2194                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2195                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2196                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2197                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2198                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2199                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2200                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2201                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2203                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2207                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2210                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2211                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2214                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2215                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2218                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2222                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2223                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2226                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2227                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2230                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2231                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2234                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2235                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2239                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2242                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2243                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2246                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2247                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2250                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2251                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2254                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2255                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2258                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2259                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2262                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2263                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2266                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2267                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2270                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2271                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2282                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286
2287                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290                                 NUM_BANKS(ADDR_SURF_16_BANK));
2291                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                 NUM_BANKS(ADDR_SURF_16_BANK));
2295                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2297                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298                                 NUM_BANKS(ADDR_SURF_16_BANK));
2299                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2301                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302                                 NUM_BANKS(ADDR_SURF_16_BANK));
2303                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2305                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306                                 NUM_BANKS(ADDR_SURF_16_BANK));
2307                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2309                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2310                                 NUM_BANKS(ADDR_SURF_16_BANK));
2311                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2313                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2314                                 NUM_BANKS(ADDR_SURF_16_BANK));
2315                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2317                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2318                                 NUM_BANKS(ADDR_SURF_16_BANK));
2319                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2320                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2321                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2322                                 NUM_BANKS(ADDR_SURF_16_BANK));
2323                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2324                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2325                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2326                                  NUM_BANKS(ADDR_SURF_16_BANK));
2327                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2329                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2330                                  NUM_BANKS(ADDR_SURF_16_BANK));
2331                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2333                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2334                                  NUM_BANKS(ADDR_SURF_8_BANK));
2335                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2337                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2338                                  NUM_BANKS(ADDR_SURF_4_BANK));
2339                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2342                                  NUM_BANKS(ADDR_SURF_4_BANK));
2343
2344                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2345                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2346
2347                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2348                         if (reg_offset != 7)
2349                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2350
2351                 break;
2352         case CHIP_BAFFIN:
2353                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2354                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2355                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2356                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2357                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2359                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2360                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2361                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2363                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2364                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2365                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2367                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2368                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2369                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2371                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2372                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2373                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2375                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2376                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2377                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2379                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2380                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2381                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2383                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2384                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2385                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2386                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2387                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2388                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2389                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2390                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2391                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2393                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2394                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2396                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2397                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2398                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2399                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2400                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2401                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2402                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2403                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2405                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2406                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2409                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2412                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2413                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2416                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2417                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2419                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2420                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2421                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2423                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2424                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2425                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2426                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2427                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2428                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2429                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2430                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2431                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2432                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2433                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2434                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2435                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2436                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2437                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2438                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2439                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2440                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2442                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2443                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2444                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2445                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2446                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2447                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2448                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2449                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2451                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2452                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2454                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2455                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2456                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2457                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2458                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2459                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2460                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2461                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2462                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2466                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2468                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2469                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2470                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2471                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2472                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2473                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2474                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2475
2476                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2479                                 NUM_BANKS(ADDR_SURF_16_BANK));
2480
2481                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2483                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2484                                 NUM_BANKS(ADDR_SURF_16_BANK));
2485
2486                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2489                                 NUM_BANKS(ADDR_SURF_16_BANK));
2490
2491                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2493                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2494                                 NUM_BANKS(ADDR_SURF_16_BANK));
2495
2496                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499                                 NUM_BANKS(ADDR_SURF_16_BANK));
2500
2501                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504                                 NUM_BANKS(ADDR_SURF_16_BANK));
2505
2506                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2509                                 NUM_BANKS(ADDR_SURF_16_BANK));
2510
2511                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2512                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2513                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514                                 NUM_BANKS(ADDR_SURF_16_BANK));
2515
2516                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519                                 NUM_BANKS(ADDR_SURF_16_BANK));
2520
2521                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2523                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2524                                 NUM_BANKS(ADDR_SURF_16_BANK));
2525
2526                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529                                 NUM_BANKS(ADDR_SURF_16_BANK));
2530
2531                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2534                                 NUM_BANKS(ADDR_SURF_16_BANK));
2535
2536                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2539                                 NUM_BANKS(ADDR_SURF_8_BANK));
2540
2541                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2543                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2544                                 NUM_BANKS(ADDR_SURF_4_BANK));
2545
2546                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2547                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2548
2549                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2550                         if (reg_offset != 7)
2551                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2552
2553                 break;
2554         case CHIP_ELLESMERE:
2555                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2558                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2559                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2560                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2562                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2563                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2566                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2567                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2570                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2574                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2575                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2578                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2579                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2582                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2586                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2588                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2589                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2590                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2592                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2593                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2598                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2600                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2601                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2602                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2603                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2605                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2606                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2608                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2612                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2613                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2614                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2616                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2618                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2621                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2622                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2625                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2626                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2629                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2630                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2632                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2633                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2634                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2636                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2637                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2638                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2640                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2641                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2642                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2644                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2645                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2646                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2648                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2649                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2650                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2651                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2652                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2653                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2654                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2656                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2658                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2660                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2662                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2665                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2668                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2670                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2672                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2673                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2677
2678                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2680                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2681                                 NUM_BANKS(ADDR_SURF_16_BANK));
2682
2683                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2685                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2686                                 NUM_BANKS(ADDR_SURF_16_BANK));
2687
2688                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2690                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2691                                 NUM_BANKS(ADDR_SURF_16_BANK));
2692
2693                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2694                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2695                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2696                                 NUM_BANKS(ADDR_SURF_16_BANK));
2697
2698                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2700                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2701                                 NUM_BANKS(ADDR_SURF_16_BANK));
2702
2703                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2706                                 NUM_BANKS(ADDR_SURF_16_BANK));
2707
2708                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2711                                 NUM_BANKS(ADDR_SURF_16_BANK));
2712
2713                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2715                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2716                                 NUM_BANKS(ADDR_SURF_16_BANK));
2717
2718                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2720                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721                                 NUM_BANKS(ADDR_SURF_16_BANK));
2722
2723                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2725                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2726                                 NUM_BANKS(ADDR_SURF_16_BANK));
2727
2728                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2730                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2731                                 NUM_BANKS(ADDR_SURF_16_BANK));
2732
2733                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2736                                 NUM_BANKS(ADDR_SURF_8_BANK));
2737
2738                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741                                 NUM_BANKS(ADDR_SURF_4_BANK));
2742
2743                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2745                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2746                                 NUM_BANKS(ADDR_SURF_4_BANK));
2747
2748                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2749                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2750
2751                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2752                         if (reg_offset != 7)
2753                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2754
2755                 break;
2756         case CHIP_STONEY:
2757                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758                                 PIPE_CONFIG(ADDR_SURF_P2) |
2759                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2761                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762                                 PIPE_CONFIG(ADDR_SURF_P2) |
2763                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2765                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766                                 PIPE_CONFIG(ADDR_SURF_P2) |
2767                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2769                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770                                 PIPE_CONFIG(ADDR_SURF_P2) |
2771                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774                                 PIPE_CONFIG(ADDR_SURF_P2) |
2775                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2777                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778                                 PIPE_CONFIG(ADDR_SURF_P2) |
2779                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2781                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2782                                 PIPE_CONFIG(ADDR_SURF_P2) |
2783                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2785                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2786                                 PIPE_CONFIG(ADDR_SURF_P2));
2787                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788                                 PIPE_CONFIG(ADDR_SURF_P2) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2791                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792                                  PIPE_CONFIG(ADDR_SURF_P2) |
2793                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2796                                  PIPE_CONFIG(ADDR_SURF_P2) |
2797                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2799                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2800                                  PIPE_CONFIG(ADDR_SURF_P2) |
2801                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2802                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                  PIPE_CONFIG(ADDR_SURF_P2) |
2805                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2806                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2808                                  PIPE_CONFIG(ADDR_SURF_P2) |
2809                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2810                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2812                                  PIPE_CONFIG(ADDR_SURF_P2) |
2813                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2815                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2816                                  PIPE_CONFIG(ADDR_SURF_P2) |
2817                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2819                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2820                                  PIPE_CONFIG(ADDR_SURF_P2) |
2821                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2822                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2823                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2824                                  PIPE_CONFIG(ADDR_SURF_P2) |
2825                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2826                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2827                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2828                                  PIPE_CONFIG(ADDR_SURF_P2) |
2829                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2830                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2831                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2832                                  PIPE_CONFIG(ADDR_SURF_P2) |
2833                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2834                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2835                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2836                                  PIPE_CONFIG(ADDR_SURF_P2) |
2837                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2838                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2839                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2840                                  PIPE_CONFIG(ADDR_SURF_P2) |
2841                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2842                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2843                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2844                                  PIPE_CONFIG(ADDR_SURF_P2) |
2845                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2846                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2847                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2848                                  PIPE_CONFIG(ADDR_SURF_P2) |
2849                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2850                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852                                  PIPE_CONFIG(ADDR_SURF_P2) |
2853                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2854                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856                                  PIPE_CONFIG(ADDR_SURF_P2) |
2857                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2858                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2859
2860                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863                                 NUM_BANKS(ADDR_SURF_8_BANK));
2864                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2866                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867                                 NUM_BANKS(ADDR_SURF_8_BANK));
2868                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2871                                 NUM_BANKS(ADDR_SURF_8_BANK));
2872                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875                                 NUM_BANKS(ADDR_SURF_8_BANK));
2876                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_8_BANK));
2880                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883                                 NUM_BANKS(ADDR_SURF_8_BANK));
2884                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2887                                 NUM_BANKS(ADDR_SURF_8_BANK));
2888                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2889                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2890                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2891                                 NUM_BANKS(ADDR_SURF_16_BANK));
2892                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2893                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2894                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2895                                 NUM_BANKS(ADDR_SURF_16_BANK));
2896                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2897                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2898                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2899                                  NUM_BANKS(ADDR_SURF_16_BANK));
2900                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2901                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2902                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2903                                  NUM_BANKS(ADDR_SURF_16_BANK));
2904                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2905                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2906                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2907                                  NUM_BANKS(ADDR_SURF_16_BANK));
2908                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2909                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2910                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911                                  NUM_BANKS(ADDR_SURF_16_BANK));
2912                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2913                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2914                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2915                                  NUM_BANKS(ADDR_SURF_8_BANK));
2916
2917                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2918                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2919                             reg_offset != 23)
2920                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2921
2922                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2923                         if (reg_offset != 7)
2924                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2925
2926                 break;
2927         default:
2928                 dev_warn(adev->dev,
2929                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2930                          adev->asic_type);
2931
2932         case CHIP_CARRIZO:
2933                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P2) |
2935                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2937                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P2) |
2939                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2941                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P2) |
2943                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2945                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P2) |
2947                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2949                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P2) |
2951                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2953                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P2) |
2955                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2957                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958                                 PIPE_CONFIG(ADDR_SURF_P2) |
2959                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2961                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2962                                 PIPE_CONFIG(ADDR_SURF_P2));
2963                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2964                                 PIPE_CONFIG(ADDR_SURF_P2) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2966                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968                                  PIPE_CONFIG(ADDR_SURF_P2) |
2969                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2970                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2971                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2972                                  PIPE_CONFIG(ADDR_SURF_P2) |
2973                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2974                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2975                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2976                                  PIPE_CONFIG(ADDR_SURF_P2) |
2977                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2978                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2979                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2980                                  PIPE_CONFIG(ADDR_SURF_P2) |
2981                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2982                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2984                                  PIPE_CONFIG(ADDR_SURF_P2) |
2985                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2986                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988                                  PIPE_CONFIG(ADDR_SURF_P2) |
2989                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2990                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2991                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2992                                  PIPE_CONFIG(ADDR_SURF_P2) |
2993                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2994                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2995                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2996                                  PIPE_CONFIG(ADDR_SURF_P2) |
2997                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2998                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2999                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3000                                  PIPE_CONFIG(ADDR_SURF_P2) |
3001                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3002                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3004                                  PIPE_CONFIG(ADDR_SURF_P2) |
3005                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3008                                  PIPE_CONFIG(ADDR_SURF_P2) |
3009                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3010                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3011                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3012                                  PIPE_CONFIG(ADDR_SURF_P2) |
3013                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3014                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3015                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3016                                  PIPE_CONFIG(ADDR_SURF_P2) |
3017                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3018                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3019                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3020                                  PIPE_CONFIG(ADDR_SURF_P2) |
3021                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3022                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3023                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024                                  PIPE_CONFIG(ADDR_SURF_P2) |
3025                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3026                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3027                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028                                  PIPE_CONFIG(ADDR_SURF_P2) |
3029                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3030                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3031                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3032                                  PIPE_CONFIG(ADDR_SURF_P2) |
3033                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3034                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3035
3036                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3038                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039                                 NUM_BANKS(ADDR_SURF_8_BANK));
3040                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3042                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3043                                 NUM_BANKS(ADDR_SURF_8_BANK));
3044                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047                                 NUM_BANKS(ADDR_SURF_8_BANK));
3048                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3051                                 NUM_BANKS(ADDR_SURF_8_BANK));
3052                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3055                                 NUM_BANKS(ADDR_SURF_8_BANK));
3056                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3058                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3059                                 NUM_BANKS(ADDR_SURF_8_BANK));
3060                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063                                 NUM_BANKS(ADDR_SURF_8_BANK));
3064                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3065                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3066                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067                                 NUM_BANKS(ADDR_SURF_16_BANK));
3068                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3069                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3070                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3071                                 NUM_BANKS(ADDR_SURF_16_BANK));
3072                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3073                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3074                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3075                                  NUM_BANKS(ADDR_SURF_16_BANK));
3076                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3077                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3078                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3079                                  NUM_BANKS(ADDR_SURF_16_BANK));
3080                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3082                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3083                                  NUM_BANKS(ADDR_SURF_16_BANK));
3084                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3087                                  NUM_BANKS(ADDR_SURF_16_BANK));
3088                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3091                                  NUM_BANKS(ADDR_SURF_8_BANK));
3092
3093                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3094                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3095                             reg_offset != 23)
3096                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3097
3098                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3099                         if (reg_offset != 7)
3100                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3101
3102                 break;
3103         }
3104 }
3105
3106 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3107 {
3108         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3109
3110         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3111                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3112                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3113         } else if (se_num == 0xffffffff) {
3114                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3115                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3116         } else if (sh_num == 0xffffffff) {
3117                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3118                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3119         } else {
3120                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3121                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3122         }
3123         WREG32(mmGRBM_GFX_INDEX, data);
3124 }
3125
3126 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3127 {
3128         return (u32)((1ULL << bit_width) - 1);
3129 }
3130
3131 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3132 {
3133         u32 data, mask;
3134
3135         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3136         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3137
3138         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3139         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3140
3141         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3142                                        adev->gfx.config.max_sh_per_se);
3143
3144         return (~data) & mask;
3145 }
3146
3147 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3148 {
3149         int i, j;
3150         u32 data;
3151         u32 active_rbs = 0;
3152         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3153                                         adev->gfx.config.max_sh_per_se;
3154
3155         mutex_lock(&adev->grbm_idx_mutex);
3156         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3157                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3158                         gfx_v8_0_select_se_sh(adev, i, j);
3159                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3160                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3161                                                rb_bitmap_width_per_sh);
3162                 }
3163         }
3164         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3165         mutex_unlock(&adev->grbm_idx_mutex);
3166
3167         adev->gfx.config.backend_enable_mask = active_rbs;
3168         adev->gfx.config.num_rbs = hweight32(active_rbs);
3169 }
3170
3171 /**
3172  * gfx_v8_0_init_compute_vmid - gart enable
3173  *
3174  * @rdev: amdgpu_device pointer
3175  *
3176  * Initialize compute vmid sh_mem registers
3177  *
3178  */
3179 #define DEFAULT_SH_MEM_BASES    (0x6000)
3180 #define FIRST_COMPUTE_VMID      (8)
3181 #define LAST_COMPUTE_VMID       (16)
3182 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3183 {
3184         int i;
3185         uint32_t sh_mem_config;
3186         uint32_t sh_mem_bases;
3187
3188         /*
3189          * Configure apertures:
3190          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3191          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3192          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3193          */
3194         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3195
3196         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3197                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3198                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3199                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3200                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3201                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3202
3203         mutex_lock(&adev->srbm_mutex);
3204         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3205                 vi_srbm_select(adev, 0, 0, 0, i);
3206                 /* CP and shaders */
3207                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3208                 WREG32(mmSH_MEM_APE1_BASE, 1);
3209                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3210                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3211         }
3212         vi_srbm_select(adev, 0, 0, 0, 0);
3213         mutex_unlock(&adev->srbm_mutex);
3214 }
3215
3216 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3217 {
3218         u32 tmp;
3219         int i;
3220
3221         tmp = RREG32(mmGRBM_CNTL);
3222         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3223         WREG32(mmGRBM_CNTL, tmp);
3224
3225         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3226         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3227         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3228
3229         gfx_v8_0_tiling_mode_table_init(adev);
3230
3231         gfx_v8_0_setup_rb(adev);
3232
3233         /* XXX SH_MEM regs */
3234         /* where to put LDS, scratch, GPUVM in FSA64 space */
3235         mutex_lock(&adev->srbm_mutex);
3236         for (i = 0; i < 16; i++) {
3237                 vi_srbm_select(adev, 0, 0, 0, i);
3238                 /* CP and shaders */
3239                 if (i == 0) {
3240                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3241                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3242                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3243                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3244                         WREG32(mmSH_MEM_CONFIG, tmp);
3245                 } else {
3246                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3247                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3248                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3249                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3250                         WREG32(mmSH_MEM_CONFIG, tmp);
3251                 }
3252
3253                 WREG32(mmSH_MEM_APE1_BASE, 1);
3254                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3255                 WREG32(mmSH_MEM_BASES, 0);
3256         }
3257         vi_srbm_select(adev, 0, 0, 0, 0);
3258         mutex_unlock(&adev->srbm_mutex);
3259
3260         gfx_v8_0_init_compute_vmid(adev);
3261
3262         mutex_lock(&adev->grbm_idx_mutex);
3263         /*
3264          * making sure that the following register writes will be broadcasted
3265          * to all the shaders
3266          */
3267         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3268
3269         WREG32(mmPA_SC_FIFO_SIZE,
3270                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3271                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3272                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3273                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3274                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3275                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3276                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3277                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3278         mutex_unlock(&adev->grbm_idx_mutex);
3279
3280 }
3281
3282 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3283 {
3284         u32 i, j, k;
3285         u32 mask;
3286
3287         mutex_lock(&adev->grbm_idx_mutex);
3288         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3289                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3290                         gfx_v8_0_select_se_sh(adev, i, j);
3291                         for (k = 0; k < adev->usec_timeout; k++) {
3292                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3293                                         break;
3294                                 udelay(1);
3295                         }
3296                 }
3297         }
3298         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3299         mutex_unlock(&adev->grbm_idx_mutex);
3300
3301         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3302                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3303                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3304                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3305         for (k = 0; k < adev->usec_timeout; k++) {
3306                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3307                         break;
3308                 udelay(1);
3309         }
3310 }
3311
3312 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3313                                                bool enable)
3314 {
3315         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3316
3317         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3318         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3319         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3320         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3321
3322         WREG32(mmCP_INT_CNTL_RING0, tmp);
3323 }
3324
3325 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3326 {
3327         u32 tmp = RREG32(mmRLC_CNTL);
3328
3329         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3330         WREG32(mmRLC_CNTL, tmp);
3331
3332         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3333
3334         gfx_v8_0_wait_for_rlc_serdes(adev);
3335 }
3336
3337 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3338 {
3339         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3340
3341         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3342         WREG32(mmGRBM_SOFT_RESET, tmp);
3343         udelay(50);
3344         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3345         WREG32(mmGRBM_SOFT_RESET, tmp);
3346         udelay(50);
3347 }
3348
3349 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3350 {
3351         u32 tmp = RREG32(mmRLC_CNTL);
3352
3353         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3354         WREG32(mmRLC_CNTL, tmp);
3355
3356         /* carrizo do enable cp interrupt after cp inited */
3357         if (!(adev->flags & AMD_IS_APU))
3358                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3359
3360         udelay(50);
3361 }
3362
3363 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3364 {
3365         const struct rlc_firmware_header_v2_0 *hdr;
3366         const __le32 *fw_data;
3367         unsigned i, fw_size;
3368
3369         if (!adev->gfx.rlc_fw)
3370                 return -EINVAL;
3371
3372         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3373         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3374
3375         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3376                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3377         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3378
3379         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3380         for (i = 0; i < fw_size; i++)
3381                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3382         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3383
3384         return 0;
3385 }
3386
3387 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3388 {
3389         int r;
3390
3391         gfx_v8_0_rlc_stop(adev);
3392
3393         /* disable CG */
3394         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3395         if (adev->asic_type == CHIP_BAFFIN ||
3396                 adev->asic_type == CHIP_ELLESMERE)
3397                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3398
3399         /* disable PG */
3400         WREG32(mmRLC_PG_CNTL, 0);
3401
3402         gfx_v8_0_rlc_reset(adev);
3403
3404         if (!adev->pp_enabled) {
3405                 if (!adev->firmware.smu_load) {
3406                         /* legacy rlc firmware loading */
3407                         r = gfx_v8_0_rlc_load_microcode(adev);
3408                         if (r)
3409                                 return r;
3410                 } else {
3411                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3412                                                         AMDGPU_UCODE_ID_RLC_G);
3413                         if (r)
3414                                 return -EINVAL;
3415                 }
3416         }
3417
3418         gfx_v8_0_rlc_start(adev);
3419
3420         return 0;
3421 }
3422
3423 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3424 {
3425         int i;
3426         u32 tmp = RREG32(mmCP_ME_CNTL);
3427
3428         if (enable) {
3429                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3430                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3431                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3432         } else {
3433                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3434                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3435                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3436                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3437                         adev->gfx.gfx_ring[i].ready = false;
3438         }
3439         WREG32(mmCP_ME_CNTL, tmp);
3440         udelay(50);
3441 }
3442
3443 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3444 {
3445         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3446         const struct gfx_firmware_header_v1_0 *ce_hdr;
3447         const struct gfx_firmware_header_v1_0 *me_hdr;
3448         const __le32 *fw_data;
3449         unsigned i, fw_size;
3450
3451         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3452                 return -EINVAL;
3453
3454         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3455                 adev->gfx.pfp_fw->data;
3456         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3457                 adev->gfx.ce_fw->data;
3458         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3459                 adev->gfx.me_fw->data;
3460
3461         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3462         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3463         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3464
3465         gfx_v8_0_cp_gfx_enable(adev, false);
3466
3467         /* PFP */
3468         fw_data = (const __le32 *)
3469                 (adev->gfx.pfp_fw->data +
3470                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3471         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3472         WREG32(mmCP_PFP_UCODE_ADDR, 0);
3473         for (i = 0; i < fw_size; i++)
3474                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3475         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3476
3477         /* CE */
3478         fw_data = (const __le32 *)
3479                 (adev->gfx.ce_fw->data +
3480                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3481         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3482         WREG32(mmCP_CE_UCODE_ADDR, 0);
3483         for (i = 0; i < fw_size; i++)
3484                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3485         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3486
3487         /* ME */
3488         fw_data = (const __le32 *)
3489                 (adev->gfx.me_fw->data +
3490                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3491         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3492         WREG32(mmCP_ME_RAM_WADDR, 0);
3493         for (i = 0; i < fw_size; i++)
3494                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3495         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3496
3497         return 0;
3498 }
3499
3500 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3501 {
3502         u32 count = 0;
3503         const struct cs_section_def *sect = NULL;
3504         const struct cs_extent_def *ext = NULL;
3505
3506         /* begin clear state */
3507         count += 2;
3508         /* context control state */
3509         count += 3;
3510
3511         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3512                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3513                         if (sect->id == SECT_CONTEXT)
3514                                 count += 2 + ext->reg_count;
3515                         else
3516                                 return 0;
3517                 }
3518         }
3519         /* pa_sc_raster_config/pa_sc_raster_config1 */
3520         count += 4;
3521         /* end clear state */
3522         count += 2;
3523         /* clear state */
3524         count += 2;
3525
3526         return count;
3527 }
3528
3529 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3530 {
3531         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3532         const struct cs_section_def *sect = NULL;
3533         const struct cs_extent_def *ext = NULL;
3534         int r, i;
3535
3536         /* init the CP */
3537         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3538         WREG32(mmCP_ENDIAN_SWAP, 0);
3539         WREG32(mmCP_DEVICE_ID, 1);
3540
3541         gfx_v8_0_cp_gfx_enable(adev, true);
3542
3543         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3544         if (r) {
3545                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3546                 return r;
3547         }
3548
3549         /* clear state buffer */
3550         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3551         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3552
3553         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3554         amdgpu_ring_write(ring, 0x80000000);
3555         amdgpu_ring_write(ring, 0x80000000);
3556
3557         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3558                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3559                         if (sect->id == SECT_CONTEXT) {
3560                                 amdgpu_ring_write(ring,
3561                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3562                                                ext->reg_count));
3563                                 amdgpu_ring_write(ring,
3564                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3565                                 for (i = 0; i < ext->reg_count; i++)
3566                                         amdgpu_ring_write(ring, ext->extent[i]);
3567                         }
3568                 }
3569         }
3570
3571         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3572         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3573         switch (adev->asic_type) {
3574         case CHIP_TONGA:
3575         case CHIP_ELLESMERE:
3576                 amdgpu_ring_write(ring, 0x16000012);
3577                 amdgpu_ring_write(ring, 0x0000002A);
3578                 break;
3579         case CHIP_BAFFIN:
3580                 amdgpu_ring_write(ring, 0x16000012);
3581                 amdgpu_ring_write(ring, 0x00000000);
3582                 break;
3583         case CHIP_FIJI:
3584                 amdgpu_ring_write(ring, 0x3a00161a);
3585                 amdgpu_ring_write(ring, 0x0000002e);
3586                 break;
3587         case CHIP_TOPAZ:
3588         case CHIP_CARRIZO:
3589                 amdgpu_ring_write(ring, 0x00000002);
3590                 amdgpu_ring_write(ring, 0x00000000);
3591                 break;
3592         case CHIP_STONEY:
3593                 amdgpu_ring_write(ring, 0x00000000);
3594                 amdgpu_ring_write(ring, 0x00000000);
3595                 break;
3596         default:
3597                 BUG();
3598         }
3599
3600         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3601         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3602
3603         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3604         amdgpu_ring_write(ring, 0);
3605
3606         /* init the CE partitions */
3607         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3608         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3609         amdgpu_ring_write(ring, 0x8000);
3610         amdgpu_ring_write(ring, 0x8000);
3611
3612         amdgpu_ring_commit(ring);
3613
3614         return 0;
3615 }
3616
3617 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3618 {
3619         struct amdgpu_ring *ring;
3620         u32 tmp;
3621         u32 rb_bufsz;
3622         u64 rb_addr, rptr_addr;
3623         int r;
3624
3625         /* Set the write pointer delay */
3626         WREG32(mmCP_RB_WPTR_DELAY, 0);
3627
3628         /* set the RB to use vmid 0 */
3629         WREG32(mmCP_RB_VMID, 0);
3630
3631         /* Set ring buffer size */
3632         ring = &adev->gfx.gfx_ring[0];
3633         rb_bufsz = order_base_2(ring->ring_size / 8);
3634         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3635         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3636         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3637         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3638 #ifdef __BIG_ENDIAN
3639         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3640 #endif
3641         WREG32(mmCP_RB0_CNTL, tmp);
3642
3643         /* Initialize the ring buffer's read and write pointers */
3644         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3645         ring->wptr = 0;
3646         WREG32(mmCP_RB0_WPTR, ring->wptr);
3647
3648         /* set the wb address wether it's enabled or not */
3649         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3650         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3651         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3652
3653         mdelay(1);
3654         WREG32(mmCP_RB0_CNTL, tmp);
3655
3656         rb_addr = ring->gpu_addr >> 8;
3657         WREG32(mmCP_RB0_BASE, rb_addr);
3658         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3659
3660         /* no gfx doorbells on iceland */
3661         if (adev->asic_type != CHIP_TOPAZ) {
3662                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3663                 if (ring->use_doorbell) {
3664                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3665                                             DOORBELL_OFFSET, ring->doorbell_index);
3666                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3667                                             DOORBELL_HIT, 0);
3668                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3669                                             DOORBELL_EN, 1);
3670                 } else {
3671                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3672                                             DOORBELL_EN, 0);
3673                 }
3674                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3675
3676                 if (adev->asic_type == CHIP_TONGA) {
3677                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3678                                             DOORBELL_RANGE_LOWER,
3679                                             AMDGPU_DOORBELL_GFX_RING0);
3680                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3681
3682                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3683                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3684                 }
3685
3686         }
3687
3688         /* start the ring */
3689         gfx_v8_0_cp_gfx_start(adev);
3690         ring->ready = true;
3691         r = amdgpu_ring_test_ring(ring);
3692         if (r) {
3693                 ring->ready = false;
3694                 return r;
3695         }
3696
3697         return 0;
3698 }
3699
3700 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3701 {
3702         int i;
3703
3704         if (enable) {
3705                 WREG32(mmCP_MEC_CNTL, 0);
3706         } else {
3707                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3708                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3709                         adev->gfx.compute_ring[i].ready = false;
3710         }
3711         udelay(50);
3712 }
3713
3714 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3715 {
3716         const struct gfx_firmware_header_v1_0 *mec_hdr;
3717         const __le32 *fw_data;
3718         unsigned i, fw_size;
3719
3720         if (!adev->gfx.mec_fw)
3721                 return -EINVAL;
3722
3723         gfx_v8_0_cp_compute_enable(adev, false);
3724
3725         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3726         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3727
3728         fw_data = (const __le32 *)
3729                 (adev->gfx.mec_fw->data +
3730                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3731         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3732
3733         /* MEC1 */
3734         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3735         for (i = 0; i < fw_size; i++)
3736                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3737         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3738
3739         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3740         if (adev->gfx.mec2_fw) {
3741                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3742
3743                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3744                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3745
3746                 fw_data = (const __le32 *)
3747                         (adev->gfx.mec2_fw->data +
3748                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3749                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3750
3751                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3752                 for (i = 0; i < fw_size; i++)
3753                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3754                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3755         }
3756
3757         return 0;
3758 }
3759
3760 struct vi_mqd {
3761         uint32_t header;  /* ordinal0 */
3762         uint32_t compute_dispatch_initiator;  /* ordinal1 */
3763         uint32_t compute_dim_x;  /* ordinal2 */
3764         uint32_t compute_dim_y;  /* ordinal3 */
3765         uint32_t compute_dim_z;  /* ordinal4 */
3766         uint32_t compute_start_x;  /* ordinal5 */
3767         uint32_t compute_start_y;  /* ordinal6 */
3768         uint32_t compute_start_z;  /* ordinal7 */
3769         uint32_t compute_num_thread_x;  /* ordinal8 */
3770         uint32_t compute_num_thread_y;  /* ordinal9 */
3771         uint32_t compute_num_thread_z;  /* ordinal10 */
3772         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3773         uint32_t compute_perfcount_enable;  /* ordinal12 */
3774         uint32_t compute_pgm_lo;  /* ordinal13 */
3775         uint32_t compute_pgm_hi;  /* ordinal14 */
3776         uint32_t compute_tba_lo;  /* ordinal15 */
3777         uint32_t compute_tba_hi;  /* ordinal16 */
3778         uint32_t compute_tma_lo;  /* ordinal17 */
3779         uint32_t compute_tma_hi;  /* ordinal18 */
3780         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3781         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3782         uint32_t compute_vmid;  /* ordinal21 */
3783         uint32_t compute_resource_limits;  /* ordinal22 */
3784         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3785         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3786         uint32_t compute_tmpring_size;  /* ordinal25 */
3787         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3788         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3789         uint32_t compute_restart_x;  /* ordinal28 */
3790         uint32_t compute_restart_y;  /* ordinal29 */
3791         uint32_t compute_restart_z;  /* ordinal30 */
3792         uint32_t compute_thread_trace_enable;  /* ordinal31 */
3793         uint32_t compute_misc_reserved;  /* ordinal32 */
3794         uint32_t compute_dispatch_id;  /* ordinal33 */
3795         uint32_t compute_threadgroup_id;  /* ordinal34 */
3796         uint32_t compute_relaunch;  /* ordinal35 */
3797         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3798         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3799         uint32_t compute_wave_restore_control;  /* ordinal38 */
3800         uint32_t reserved9;  /* ordinal39 */
3801         uint32_t reserved10;  /* ordinal40 */
3802         uint32_t reserved11;  /* ordinal41 */
3803         uint32_t reserved12;  /* ordinal42 */
3804         uint32_t reserved13;  /* ordinal43 */
3805         uint32_t reserved14;  /* ordinal44 */
3806         uint32_t reserved15;  /* ordinal45 */
3807         uint32_t reserved16;  /* ordinal46 */
3808         uint32_t reserved17;  /* ordinal47 */
3809         uint32_t reserved18;  /* ordinal48 */
3810         uint32_t reserved19;  /* ordinal49 */
3811         uint32_t reserved20;  /* ordinal50 */
3812         uint32_t reserved21;  /* ordinal51 */
3813         uint32_t reserved22;  /* ordinal52 */
3814         uint32_t reserved23;  /* ordinal53 */
3815         uint32_t reserved24;  /* ordinal54 */
3816         uint32_t reserved25;  /* ordinal55 */
3817         uint32_t reserved26;  /* ordinal56 */
3818         uint32_t reserved27;  /* ordinal57 */
3819         uint32_t reserved28;  /* ordinal58 */
3820         uint32_t reserved29;  /* ordinal59 */
3821         uint32_t reserved30;  /* ordinal60 */
3822         uint32_t reserved31;  /* ordinal61 */
3823         uint32_t reserved32;  /* ordinal62 */
3824         uint32_t reserved33;  /* ordinal63 */
3825         uint32_t reserved34;  /* ordinal64 */
3826         uint32_t compute_user_data_0;  /* ordinal65 */
3827         uint32_t compute_user_data_1;  /* ordinal66 */
3828         uint32_t compute_user_data_2;  /* ordinal67 */
3829         uint32_t compute_user_data_3;  /* ordinal68 */
3830         uint32_t compute_user_data_4;  /* ordinal69 */
3831         uint32_t compute_user_data_5;  /* ordinal70 */
3832         uint32_t compute_user_data_6;  /* ordinal71 */
3833         uint32_t compute_user_data_7;  /* ordinal72 */
3834         uint32_t compute_user_data_8;  /* ordinal73 */
3835         uint32_t compute_user_data_9;  /* ordinal74 */
3836         uint32_t compute_user_data_10;  /* ordinal75 */
3837         uint32_t compute_user_data_11;  /* ordinal76 */
3838         uint32_t compute_user_data_12;  /* ordinal77 */
3839         uint32_t compute_user_data_13;  /* ordinal78 */
3840         uint32_t compute_user_data_14;  /* ordinal79 */
3841         uint32_t compute_user_data_15;  /* ordinal80 */
3842         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3843         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3844         uint32_t reserved35;  /* ordinal83 */
3845         uint32_t reserved36;  /* ordinal84 */
3846         uint32_t reserved37;  /* ordinal85 */
3847         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3848         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3849         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3850         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3851         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3852         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3853         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3854         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3855         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3856         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3857         uint32_t reserved38;  /* ordinal96 */
3858         uint32_t reserved39;  /* ordinal97 */
3859         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3860         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3861         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3862         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3863         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3864         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3865         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3866         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3867         uint32_t reserved40;  /* ordinal106 */
3868         uint32_t reserved41;  /* ordinal107 */
3869         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3870         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3871         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3872         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3873         uint32_t reserved42;  /* ordinal112 */
3874         uint32_t reserved43;  /* ordinal113 */
3875         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3876         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3877         uint32_t cp_packet_id_lo;  /* ordinal116 */
3878         uint32_t cp_packet_id_hi;  /* ordinal117 */
3879         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3880         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3881         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3882         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3883         uint32_t gds_save_mask_lo;  /* ordinal122 */
3884         uint32_t gds_save_mask_hi;  /* ordinal123 */
3885         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3886         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3887         uint32_t reserved44;  /* ordinal126 */
3888         uint32_t reserved45;  /* ordinal127 */
3889         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3890         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3891         uint32_t cp_hqd_active;  /* ordinal130 */
3892         uint32_t cp_hqd_vmid;  /* ordinal131 */
3893         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3894         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3895         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3896         uint32_t cp_hqd_quantum;  /* ordinal135 */
3897         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3898         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3899         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3900         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3901         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3902         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3903         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3904         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3905         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3906         uint32_t cp_hqd_pq_control;  /* ordinal145 */
3907         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3908         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3909         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3910         uint32_t cp_hqd_ib_control;  /* ordinal149 */
3911         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3912         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3913         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3914         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3915         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3916         uint32_t cp_hqd_msg_type;  /* ordinal155 */
3917         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3918         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3919         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3920         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3921         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3922         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3923         uint32_t cp_mqd_control;  /* ordinal162 */
3924         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3925         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3926         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3927         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3928         uint32_t cp_hqd_eop_control;  /* ordinal167 */
3929         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3930         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3931         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3932         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3933         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3934         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3935         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3936         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3937         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3938         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3939         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3940         uint32_t cp_hqd_error;  /* ordinal179 */
3941         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3942         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3943         uint32_t reserved46;  /* ordinal182 */
3944         uint32_t reserved47;  /* ordinal183 */
3945         uint32_t reserved48;  /* ordinal184 */
3946         uint32_t reserved49;  /* ordinal185 */
3947         uint32_t reserved50;  /* ordinal186 */
3948         uint32_t reserved51;  /* ordinal187 */
3949         uint32_t reserved52;  /* ordinal188 */
3950         uint32_t reserved53;  /* ordinal189 */
3951         uint32_t reserved54;  /* ordinal190 */
3952         uint32_t reserved55;  /* ordinal191 */
3953         uint32_t iqtimer_pkt_header;  /* ordinal192 */
3954         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3955         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3956         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3957         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3958         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3959         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3960         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3961         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3962         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3963         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3964         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3965         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3966         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3967         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3968         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3969         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3970         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3971         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3972         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3973         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3974         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3975         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3976         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3977         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3978         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3979         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3980         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3981         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3982         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3983         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3984         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3985         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3986         uint32_t reserved56;  /* ordinal225 */
3987         uint32_t reserved57;  /* ordinal226 */
3988         uint32_t reserved58;  /* ordinal227 */
3989         uint32_t set_resources_header;  /* ordinal228 */
3990         uint32_t set_resources_dw1;  /* ordinal229 */
3991         uint32_t set_resources_dw2;  /* ordinal230 */
3992         uint32_t set_resources_dw3;  /* ordinal231 */
3993         uint32_t set_resources_dw4;  /* ordinal232 */
3994         uint32_t set_resources_dw5;  /* ordinal233 */
3995         uint32_t set_resources_dw6;  /* ordinal234 */
3996         uint32_t set_resources_dw7;  /* ordinal235 */
3997         uint32_t reserved59;  /* ordinal236 */
3998         uint32_t reserved60;  /* ordinal237 */
3999         uint32_t reserved61;  /* ordinal238 */
4000         uint32_t reserved62;  /* ordinal239 */
4001         uint32_t reserved63;  /* ordinal240 */
4002         uint32_t reserved64;  /* ordinal241 */
4003         uint32_t reserved65;  /* ordinal242 */
4004         uint32_t reserved66;  /* ordinal243 */
4005         uint32_t reserved67;  /* ordinal244 */
4006         uint32_t reserved68;  /* ordinal245 */
4007         uint32_t reserved69;  /* ordinal246 */
4008         uint32_t reserved70;  /* ordinal247 */
4009         uint32_t reserved71;  /* ordinal248 */
4010         uint32_t reserved72;  /* ordinal249 */
4011         uint32_t reserved73;  /* ordinal250 */
4012         uint32_t reserved74;  /* ordinal251 */
4013         uint32_t reserved75;  /* ordinal252 */
4014         uint32_t reserved76;  /* ordinal253 */
4015         uint32_t reserved77;  /* ordinal254 */
4016         uint32_t reserved78;  /* ordinal255 */
4017
4018         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4019 };
4020
4021 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4022 {
4023         int i, r;
4024
4025         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4026                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4027
4028                 if (ring->mqd_obj) {
4029                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4030                         if (unlikely(r != 0))
4031                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4032
4033                         amdgpu_bo_unpin(ring->mqd_obj);
4034                         amdgpu_bo_unreserve(ring->mqd_obj);
4035
4036                         amdgpu_bo_unref(&ring->mqd_obj);
4037                         ring->mqd_obj = NULL;
4038                 }
4039         }
4040 }
4041
4042 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4043 {
4044         int r, i, j;
4045         u32 tmp;
4046         bool use_doorbell = true;
4047         u64 hqd_gpu_addr;
4048         u64 mqd_gpu_addr;
4049         u64 eop_gpu_addr;
4050         u64 wb_gpu_addr;
4051         u32 *buf;
4052         struct vi_mqd *mqd;
4053
4054         /* init the pipes */
4055         mutex_lock(&adev->srbm_mutex);
4056         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4057                 int me = (i < 4) ? 1 : 2;
4058                 int pipe = (i < 4) ? i : (i - 4);
4059
4060                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4061                 eop_gpu_addr >>= 8;
4062
4063                 vi_srbm_select(adev, me, pipe, 0, 0);
4064
4065                 /* write the EOP addr */
4066                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4067                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4068
4069                 /* set the VMID assigned */
4070                 WREG32(mmCP_HQD_VMID, 0);
4071
4072                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4073                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4074                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4075                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4076                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4077         }
4078         vi_srbm_select(adev, 0, 0, 0, 0);
4079         mutex_unlock(&adev->srbm_mutex);
4080
4081         /* init the queues.  Just two for now. */
4082         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4083                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4084
4085                 if (ring->mqd_obj == NULL) {
4086                         r = amdgpu_bo_create(adev,
4087                                              sizeof(struct vi_mqd),
4088                                              PAGE_SIZE, true,
4089                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4090                                              NULL, &ring->mqd_obj);
4091                         if (r) {
4092                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4093                                 return r;
4094                         }
4095                 }
4096
4097                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4098                 if (unlikely(r != 0)) {
4099                         gfx_v8_0_cp_compute_fini(adev);
4100                         return r;
4101                 }
4102                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4103                                   &mqd_gpu_addr);
4104                 if (r) {
4105                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4106                         gfx_v8_0_cp_compute_fini(adev);
4107                         return r;
4108                 }
4109                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4110                 if (r) {
4111                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4112                         gfx_v8_0_cp_compute_fini(adev);
4113                         return r;
4114                 }
4115
4116                 /* init the mqd struct */
4117                 memset(buf, 0, sizeof(struct vi_mqd));
4118
4119                 mqd = (struct vi_mqd *)buf;
4120                 mqd->header = 0xC0310800;
4121                 mqd->compute_pipelinestat_enable = 0x00000001;
4122                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4123                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4124                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4125                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4126                 mqd->compute_misc_reserved = 0x00000003;
4127
4128                 mutex_lock(&adev->srbm_mutex);
4129                 vi_srbm_select(adev, ring->me,
4130                                ring->pipe,
4131                                ring->queue, 0);
4132
4133                 /* disable wptr polling */
4134                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4135                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4136                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4137
4138                 mqd->cp_hqd_eop_base_addr_lo =
4139                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4140                 mqd->cp_hqd_eop_base_addr_hi =
4141                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4142
4143                 /* enable doorbell? */
4144                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4145                 if (use_doorbell) {
4146                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4147                 } else {
4148                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4149                 }
4150                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4151                 mqd->cp_hqd_pq_doorbell_control = tmp;
4152
4153                 /* disable the queue if it's active */
4154                 mqd->cp_hqd_dequeue_request = 0;
4155                 mqd->cp_hqd_pq_rptr = 0;
4156                 mqd->cp_hqd_pq_wptr= 0;
4157                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4158                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4159                         for (j = 0; j < adev->usec_timeout; j++) {
4160                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4161                                         break;
4162                                 udelay(1);
4163                         }
4164                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4165                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4166                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4167                 }
4168
4169                 /* set the pointer to the MQD */
4170                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4171                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4172                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4173                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4174
4175                 /* set MQD vmid to 0 */
4176                 tmp = RREG32(mmCP_MQD_CONTROL);
4177                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4178                 WREG32(mmCP_MQD_CONTROL, tmp);
4179                 mqd->cp_mqd_control = tmp;
4180
4181                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4182                 hqd_gpu_addr = ring->gpu_addr >> 8;
4183                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4184                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4185                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4186                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4187
4188                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4189                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4190                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4191                                     (order_base_2(ring->ring_size / 4) - 1));
4192                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4193                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4194 #ifdef __BIG_ENDIAN
4195                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4196 #endif
4197                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4198                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4199                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4200                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4201                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4202                 mqd->cp_hqd_pq_control = tmp;
4203
4204                 /* set the wb address wether it's enabled or not */
4205                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4206                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4207                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4208                         upper_32_bits(wb_gpu_addr) & 0xffff;
4209                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4210                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4211                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4212                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4213
4214                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4215                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4216                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4217                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4218                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4219                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4220                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4221
4222                 /* enable the doorbell if requested */
4223                 if (use_doorbell) {
4224                         if ((adev->asic_type == CHIP_CARRIZO) ||
4225                             (adev->asic_type == CHIP_FIJI) ||
4226                             (adev->asic_type == CHIP_STONEY) ||
4227                             (adev->asic_type == CHIP_BAFFIN) ||
4228                             (adev->asic_type == CHIP_ELLESMERE)) {
4229                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4230                                        AMDGPU_DOORBELL_KIQ << 2);
4231                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4232                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4233                         }
4234                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4235                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4236                                             DOORBELL_OFFSET, ring->doorbell_index);
4237                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4238                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4239                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4240                         mqd->cp_hqd_pq_doorbell_control = tmp;
4241
4242                 } else {
4243                         mqd->cp_hqd_pq_doorbell_control = 0;
4244                 }
4245                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4246                        mqd->cp_hqd_pq_doorbell_control);
4247
4248                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4249                 ring->wptr = 0;
4250                 mqd->cp_hqd_pq_wptr = ring->wptr;
4251                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4252                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4253
4254                 /* set the vmid for the queue */
4255                 mqd->cp_hqd_vmid = 0;
4256                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4257
4258                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4259                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4260                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4261                 mqd->cp_hqd_persistent_state = tmp;
4262                 if (adev->asic_type == CHIP_STONEY ||
4263                         adev->asic_type == CHIP_BAFFIN ||
4264                         adev->asic_type == CHIP_ELLESMERE) {
4265                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4266                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4267                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4268                 }
4269
4270                 /* activate the queue */
4271                 mqd->cp_hqd_active = 1;
4272                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4273
4274                 vi_srbm_select(adev, 0, 0, 0, 0);
4275                 mutex_unlock(&adev->srbm_mutex);
4276
4277                 amdgpu_bo_kunmap(ring->mqd_obj);
4278                 amdgpu_bo_unreserve(ring->mqd_obj);
4279         }
4280
4281         if (use_doorbell) {
4282                 tmp = RREG32(mmCP_PQ_STATUS);
4283                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4284                 WREG32(mmCP_PQ_STATUS, tmp);
4285         }
4286
4287         gfx_v8_0_cp_compute_enable(adev, true);
4288
4289         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4290                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4291
4292                 ring->ready = true;
4293                 r = amdgpu_ring_test_ring(ring);
4294                 if (r)
4295                         ring->ready = false;
4296         }
4297
4298         return 0;
4299 }
4300
4301 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4302 {
4303         int r;
4304
4305         if (!(adev->flags & AMD_IS_APU))
4306                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4307
4308         if (!adev->pp_enabled) {
4309                 if (!adev->firmware.smu_load) {
4310                         /* legacy firmware loading */
4311                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4312                         if (r)
4313                                 return r;
4314
4315                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4316                         if (r)
4317                                 return r;
4318                 } else {
4319                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4320                                                         AMDGPU_UCODE_ID_CP_CE);
4321                         if (r)
4322                                 return -EINVAL;
4323
4324                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4325                                                         AMDGPU_UCODE_ID_CP_PFP);
4326                         if (r)
4327                                 return -EINVAL;
4328
4329                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4330                                                         AMDGPU_UCODE_ID_CP_ME);
4331                         if (r)
4332                                 return -EINVAL;
4333
4334                         if (adev->asic_type == CHIP_TOPAZ) {
4335                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4336                                 if (r)
4337                                         return r;
4338                         } else {
4339                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4340                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4341                                 if (r)
4342                                         return -EINVAL;
4343                         }
4344                 }
4345         }
4346
4347         r = gfx_v8_0_cp_gfx_resume(adev);
4348         if (r)
4349                 return r;
4350
4351         r = gfx_v8_0_cp_compute_resume(adev);
4352         if (r)
4353                 return r;
4354
4355         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4356
4357         return 0;
4358 }
4359
4360 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4361 {
4362         gfx_v8_0_cp_gfx_enable(adev, enable);
4363         gfx_v8_0_cp_compute_enable(adev, enable);
4364 }
4365
4366 static int gfx_v8_0_hw_init(void *handle)
4367 {
4368         int r;
4369         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4370
4371         gfx_v8_0_init_golden_registers(adev);
4372
4373         gfx_v8_0_gpu_init(adev);
4374
4375         r = gfx_v8_0_rlc_resume(adev);
4376         if (r)
4377                 return r;
4378
4379         r = gfx_v8_0_cp_resume(adev);
4380         if (r)
4381                 return r;
4382
4383         return r;
4384 }
4385
4386 static int gfx_v8_0_hw_fini(void *handle)
4387 {
4388         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4389
4390         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4391         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4392         gfx_v8_0_cp_enable(adev, false);
4393         gfx_v8_0_rlc_stop(adev);
4394         gfx_v8_0_cp_compute_fini(adev);
4395
4396         return 0;
4397 }
4398
4399 static int gfx_v8_0_suspend(void *handle)
4400 {
4401         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4402
4403         return gfx_v8_0_hw_fini(adev);
4404 }
4405
4406 static int gfx_v8_0_resume(void *handle)
4407 {
4408         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4409
4410         return gfx_v8_0_hw_init(adev);
4411 }
4412
4413 static bool gfx_v8_0_is_idle(void *handle)
4414 {
4415         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4416
4417         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4418                 return false;
4419         else
4420                 return true;
4421 }
4422
4423 static int gfx_v8_0_wait_for_idle(void *handle)
4424 {
4425         unsigned i;
4426         u32 tmp;
4427         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4428
4429         for (i = 0; i < adev->usec_timeout; i++) {
4430                 /* read MC_STATUS */
4431                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4432
4433                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4434                         return 0;
4435                 udelay(1);
4436         }
4437         return -ETIMEDOUT;
4438 }
4439
4440 static void gfx_v8_0_print_status(void *handle)
4441 {
4442         int i;
4443         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4444
4445         dev_info(adev->dev, "GFX 8.x registers\n");
4446         dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
4447                  RREG32(mmGRBM_STATUS));
4448         dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
4449                  RREG32(mmGRBM_STATUS2));
4450         dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4451                  RREG32(mmGRBM_STATUS_SE0));
4452         dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4453                  RREG32(mmGRBM_STATUS_SE1));
4454         dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4455                  RREG32(mmGRBM_STATUS_SE2));
4456         dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4457                  RREG32(mmGRBM_STATUS_SE3));
4458         dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
4459         dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4460                  RREG32(mmCP_STALLED_STAT1));
4461         dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4462                  RREG32(mmCP_STALLED_STAT2));
4463         dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4464                  RREG32(mmCP_STALLED_STAT3));
4465         dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4466                  RREG32(mmCP_CPF_BUSY_STAT));
4467         dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4468                  RREG32(mmCP_CPF_STALLED_STAT1));
4469         dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
4470         dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
4471         dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4472                  RREG32(mmCP_CPC_STALLED_STAT1));
4473         dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
4474
4475         for (i = 0; i < 32; i++) {
4476                 dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
4477                          i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
4478         }
4479         for (i = 0; i < 16; i++) {
4480                 dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
4481                          i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
4482         }
4483         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4484                 dev_info(adev->dev, "  se: %d\n", i);
4485                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
4486                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
4487                          RREG32(mmPA_SC_RASTER_CONFIG));
4488                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
4489                          RREG32(mmPA_SC_RASTER_CONFIG_1));
4490         }
4491         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4492
4493         dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
4494                  RREG32(mmGB_ADDR_CONFIG));
4495         dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
4496                  RREG32(mmHDP_ADDR_CONFIG));
4497         dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
4498                  RREG32(mmDMIF_ADDR_CALC));
4499
4500         dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
4501                  RREG32(mmCP_MEQ_THRESHOLDS));
4502         dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
4503                  RREG32(mmSX_DEBUG_1));
4504         dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
4505                  RREG32(mmTA_CNTL_AUX));
4506         dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
4507                  RREG32(mmSPI_CONFIG_CNTL));
4508         dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
4509                  RREG32(mmSQ_CONFIG));
4510         dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
4511                  RREG32(mmDB_DEBUG));
4512         dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
4513                  RREG32(mmDB_DEBUG2));
4514         dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
4515                  RREG32(mmDB_DEBUG3));
4516         dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
4517                  RREG32(mmCB_HW_CONTROL));
4518         dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
4519                  RREG32(mmSPI_CONFIG_CNTL_1));
4520         dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
4521                  RREG32(mmPA_SC_FIFO_SIZE));
4522         dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
4523                  RREG32(mmVGT_NUM_INSTANCES));
4524         dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
4525                  RREG32(mmCP_PERFMON_CNTL));
4526         dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4527                  RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4528         dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
4529                  RREG32(mmVGT_CACHE_INVALIDATION));
4530         dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
4531                  RREG32(mmVGT_GS_VERTEX_REUSE));
4532         dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4533                  RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4534         dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
4535                  RREG32(mmPA_CL_ENHANCE));
4536         dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
4537                  RREG32(mmPA_SC_ENHANCE));
4538
4539         dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
4540                  RREG32(mmCP_ME_CNTL));
4541         dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
4542                  RREG32(mmCP_MAX_CONTEXT));
4543         dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
4544                  RREG32(mmCP_ENDIAN_SWAP));
4545         dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
4546                  RREG32(mmCP_DEVICE_ID));
4547
4548         dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4549                  RREG32(mmCP_SEM_WAIT_TIMER));
4550
4551         dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4552                  RREG32(mmCP_RB_WPTR_DELAY));
4553         dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4554                  RREG32(mmCP_RB_VMID));
4555         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4556                  RREG32(mmCP_RB0_CNTL));
4557         dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4558                  RREG32(mmCP_RB0_WPTR));
4559         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4560                  RREG32(mmCP_RB0_RPTR_ADDR));
4561         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4562                  RREG32(mmCP_RB0_RPTR_ADDR_HI));
4563         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4564                  RREG32(mmCP_RB0_CNTL));
4565         dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4566                  RREG32(mmCP_RB0_BASE));
4567         dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4568                  RREG32(mmCP_RB0_BASE_HI));
4569         dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4570                  RREG32(mmCP_MEC_CNTL));
4571         dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4572                  RREG32(mmCP_CPF_DEBUG));
4573
4574         dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4575                  RREG32(mmSCRATCH_ADDR));
4576         dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4577                  RREG32(mmSCRATCH_UMSK));
4578
4579         dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4580                  RREG32(mmCP_INT_CNTL_RING0));
4581         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4582                  RREG32(mmRLC_LB_CNTL));
4583         dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4584                  RREG32(mmRLC_CNTL));
4585         dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4586                  RREG32(mmRLC_CGCG_CGLS_CTRL));
4587         dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4588                  RREG32(mmRLC_LB_CNTR_INIT));
4589         dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4590                  RREG32(mmRLC_LB_CNTR_MAX));
4591         dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4592                  RREG32(mmRLC_LB_INIT_CU_MASK));
4593         dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4594                  RREG32(mmRLC_LB_PARAMS));
4595         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4596                  RREG32(mmRLC_LB_CNTL));
4597         dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4598                  RREG32(mmRLC_MC_CNTL));
4599         dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4600                  RREG32(mmRLC_UCODE_CNTL));
4601
4602         mutex_lock(&adev->srbm_mutex);
4603         for (i = 0; i < 16; i++) {
4604                 vi_srbm_select(adev, 0, 0, 0, i);
4605                 dev_info(adev->dev, "  VM %d:\n", i);
4606                 dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4607                          RREG32(mmSH_MEM_CONFIG));
4608                 dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4609                          RREG32(mmSH_MEM_APE1_BASE));
4610                 dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4611                          RREG32(mmSH_MEM_APE1_LIMIT));
4612                 dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4613                          RREG32(mmSH_MEM_BASES));
4614         }
4615         vi_srbm_select(adev, 0, 0, 0, 0);
4616         mutex_unlock(&adev->srbm_mutex);
4617 }
4618
4619 static int gfx_v8_0_soft_reset(void *handle)
4620 {
4621         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4622         u32 tmp;
4623         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4624
4625         /* GRBM_STATUS */
4626         tmp = RREG32(mmGRBM_STATUS);
4627         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4628                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4629                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4630                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4631                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4632                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4633                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4634                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4635                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4636                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4637         }
4638
4639         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4640                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4641                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4642                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4643                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4644         }
4645
4646         /* GRBM_STATUS2 */
4647         tmp = RREG32(mmGRBM_STATUS2);
4648         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4649                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4650                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4651
4652         /* SRBM_STATUS */
4653         tmp = RREG32(mmSRBM_STATUS);
4654         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4655                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4656                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4657
4658         if (grbm_soft_reset || srbm_soft_reset) {
4659                 gfx_v8_0_print_status((void *)adev);
4660                 /* stop the rlc */
4661                 gfx_v8_0_rlc_stop(adev);
4662
4663                 /* Disable GFX parsing/prefetching */
4664                 gfx_v8_0_cp_gfx_enable(adev, false);
4665
4666                 /* Disable MEC parsing/prefetching */
4667                 gfx_v8_0_cp_compute_enable(adev, false);
4668
4669                 if (grbm_soft_reset || srbm_soft_reset) {
4670                         tmp = RREG32(mmGMCON_DEBUG);
4671                         tmp = REG_SET_FIELD(tmp,
4672                                             GMCON_DEBUG, GFX_STALL, 1);
4673                         tmp = REG_SET_FIELD(tmp,
4674                                             GMCON_DEBUG, GFX_CLEAR, 1);
4675                         WREG32(mmGMCON_DEBUG, tmp);
4676
4677                         udelay(50);
4678                 }
4679
4680                 if (grbm_soft_reset) {
4681                         tmp = RREG32(mmGRBM_SOFT_RESET);
4682                         tmp |= grbm_soft_reset;
4683                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4684                         WREG32(mmGRBM_SOFT_RESET, tmp);
4685                         tmp = RREG32(mmGRBM_SOFT_RESET);
4686
4687                         udelay(50);
4688
4689                         tmp &= ~grbm_soft_reset;
4690                         WREG32(mmGRBM_SOFT_RESET, tmp);
4691                         tmp = RREG32(mmGRBM_SOFT_RESET);
4692                 }
4693
4694                 if (srbm_soft_reset) {
4695                         tmp = RREG32(mmSRBM_SOFT_RESET);
4696                         tmp |= srbm_soft_reset;
4697                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4698                         WREG32(mmSRBM_SOFT_RESET, tmp);
4699                         tmp = RREG32(mmSRBM_SOFT_RESET);
4700
4701                         udelay(50);
4702
4703                         tmp &= ~srbm_soft_reset;
4704                         WREG32(mmSRBM_SOFT_RESET, tmp);
4705                         tmp = RREG32(mmSRBM_SOFT_RESET);
4706                 }
4707
4708                 if (grbm_soft_reset || srbm_soft_reset) {
4709                         tmp = RREG32(mmGMCON_DEBUG);
4710                         tmp = REG_SET_FIELD(tmp,
4711                                             GMCON_DEBUG, GFX_STALL, 0);
4712                         tmp = REG_SET_FIELD(tmp,
4713                                             GMCON_DEBUG, GFX_CLEAR, 0);
4714                         WREG32(mmGMCON_DEBUG, tmp);
4715                 }
4716
4717                 /* Wait a little for things to settle down */
4718                 udelay(50);
4719                 gfx_v8_0_print_status((void *)adev);
4720         }
4721         return 0;
4722 }
4723
4724 /**
4725  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4726  *
4727  * @adev: amdgpu_device pointer
4728  *
4729  * Fetches a GPU clock counter snapshot.
4730  * Returns the 64 bit clock counter snapshot.
4731  */
4732 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4733 {
4734         uint64_t clock;
4735
4736         mutex_lock(&adev->gfx.gpu_clock_mutex);
4737         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4738         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4739                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4740         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4741         return clock;
4742 }
4743
4744 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4745                                           uint32_t vmid,
4746                                           uint32_t gds_base, uint32_t gds_size,
4747                                           uint32_t gws_base, uint32_t gws_size,
4748                                           uint32_t oa_base, uint32_t oa_size)
4749 {
4750         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4751         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4752
4753         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4754         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4755
4756         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4757         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4758
4759         /* GDS Base */
4760         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4761         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4762                                 WRITE_DATA_DST_SEL(0)));
4763         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4764         amdgpu_ring_write(ring, 0);
4765         amdgpu_ring_write(ring, gds_base);
4766
4767         /* GDS Size */
4768         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4769         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4770                                 WRITE_DATA_DST_SEL(0)));
4771         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4772         amdgpu_ring_write(ring, 0);
4773         amdgpu_ring_write(ring, gds_size);
4774
4775         /* GWS */
4776         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4777         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4778                                 WRITE_DATA_DST_SEL(0)));
4779         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4780         amdgpu_ring_write(ring, 0);
4781         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4782
4783         /* OA */
4784         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4785         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4786                                 WRITE_DATA_DST_SEL(0)));
4787         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4788         amdgpu_ring_write(ring, 0);
4789         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4790 }
4791
4792 static int gfx_v8_0_early_init(void *handle)
4793 {
4794         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4795
4796         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4797         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4798         gfx_v8_0_set_ring_funcs(adev);
4799         gfx_v8_0_set_irq_funcs(adev);
4800         gfx_v8_0_set_gds_init(adev);
4801         gfx_v8_0_set_rlc_funcs(adev);
4802
4803         return 0;
4804 }
4805
4806 static int gfx_v8_0_late_init(void *handle)
4807 {
4808         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4809         int r;
4810
4811         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4812         if (r)
4813                 return r;
4814
4815         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4816         if (r)
4817                 return r;
4818
4819         /* requires IBs so do in late init after IB pool is initialized */
4820         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4821         if (r)
4822                 return r;
4823
4824         return 0;
4825 }
4826
4827 static int gfx_v8_0_set_powergating_state(void *handle,
4828                                           enum amd_powergating_state state)
4829 {
4830         return 0;
4831 }
4832
4833 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
4834                                      uint32_t reg_addr, uint32_t cmd)
4835 {
4836         uint32_t data;
4837
4838         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4839
4840         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4841         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4842
4843         data = RREG32(mmRLC_SERDES_WR_CTRL);
4844         if (adev->asic_type == CHIP_STONEY)
4845                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4846                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4847                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4848                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4849                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4850                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4851                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4852                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4853                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4854         else
4855                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4856                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4857                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4858                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4859                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4860                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4861                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4862                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4863                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4864                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4865                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4866         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4867                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4868                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4869                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4870
4871         WREG32(mmRLC_SERDES_WR_CTRL, data);
4872 }
4873
4874 #define MSG_ENTER_RLC_SAFE_MODE     1
4875 #define MSG_EXIT_RLC_SAFE_MODE      0
4876
4877 #define RLC_GPR_REG2__REQ_MASK           0x00000001
4878 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
4879 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
4880
4881 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
4882 {
4883         u32 data = 0;
4884         unsigned i;
4885
4886         data = RREG32(mmRLC_CNTL);
4887         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
4888                 return;
4889
4890         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
4891             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
4892                                AMD_PG_SUPPORT_GFX_DMG))) {
4893                 data |= RLC_GPR_REG2__REQ_MASK;
4894                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
4895                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
4896                 WREG32(mmRLC_GPR_REG2, data);
4897
4898                 for (i = 0; i < adev->usec_timeout; i++) {
4899                         if ((RREG32(mmRLC_GPM_STAT) &
4900                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4901                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
4902                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4903                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
4904                                 break;
4905                         udelay(1);
4906                 }
4907
4908                 for (i = 0; i < adev->usec_timeout; i++) {
4909                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
4910                                 break;
4911                         udelay(1);
4912                 }
4913                 adev->gfx.rlc.in_safe_mode = true;
4914         }
4915 }
4916
4917 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
4918 {
4919         u32 data;
4920         unsigned i;
4921
4922         data = RREG32(mmRLC_CNTL);
4923         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
4924                 return;
4925
4926         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
4927             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
4928                                AMD_PG_SUPPORT_GFX_DMG))) {
4929                 data |= RLC_GPR_REG2__REQ_MASK;
4930                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
4931                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
4932                 WREG32(mmRLC_GPR_REG2, data);
4933                 adev->gfx.rlc.in_safe_mode = false;
4934         }
4935
4936         for (i = 0; i < adev->usec_timeout; i++) {
4937                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
4938                         break;
4939                 udelay(1);
4940         }
4941 }
4942
4943 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
4944 {
4945         u32 data;
4946         unsigned i;
4947
4948         data = RREG32(mmRLC_CNTL);
4949         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
4950                 return;
4951
4952         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
4953                 data |= RLC_SAFE_MODE__CMD_MASK;
4954                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
4955                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4956                 WREG32(mmRLC_SAFE_MODE, data);
4957
4958                 for (i = 0; i < adev->usec_timeout; i++) {
4959                         if ((RREG32(mmRLC_GPM_STAT) &
4960                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4961                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
4962                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4963                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
4964                                 break;
4965                         udelay(1);
4966                 }
4967
4968                 for (i = 0; i < adev->usec_timeout; i++) {
4969                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
4970                                 break;
4971                         udelay(1);
4972                 }
4973                 adev->gfx.rlc.in_safe_mode = true;
4974         }
4975 }
4976
4977 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
4978 {
4979         u32 data = 0;
4980         unsigned i;
4981
4982         data = RREG32(mmRLC_CNTL);
4983         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
4984                 return;
4985
4986         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
4987                 if (adev->gfx.rlc.in_safe_mode) {
4988                         data |= RLC_SAFE_MODE__CMD_MASK;
4989                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
4990                         WREG32(mmRLC_SAFE_MODE, data);
4991                         adev->gfx.rlc.in_safe_mode = false;
4992                 }
4993         }
4994
4995         for (i = 0; i < adev->usec_timeout; i++) {
4996                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
4997                         break;
4998                 udelay(1);
4999         }
5000 }
5001
5002 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5003 {
5004         adev->gfx.rlc.in_safe_mode = true;
5005 }
5006
5007 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5008 {
5009         adev->gfx.rlc.in_safe_mode = false;
5010 }
5011
5012 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5013         .enter_safe_mode = cz_enter_rlc_safe_mode,
5014         .exit_safe_mode = cz_exit_rlc_safe_mode
5015 };
5016
5017 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5018         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5019         .exit_safe_mode = iceland_exit_rlc_safe_mode
5020 };
5021
5022 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5023         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5024         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5025 };
5026
5027 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5028                                                       bool enable)
5029 {
5030         uint32_t temp, data;
5031
5032         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5033
5034         /* It is disabled by HW by default */
5035         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5036                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5037                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5038                                 /* 1 - RLC memory Light sleep */
5039                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5040                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5041                                 if (temp != data)
5042                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5043                         }
5044
5045                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5046                                 /* 2 - CP memory Light sleep */
5047                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5048                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5049                                 if (temp != data)
5050                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5051                         }
5052                 }
5053
5054                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5055                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5056                 if (adev->flags & AMD_IS_APU)
5057                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5058                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5059                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5060                 else
5061                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5062                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5063                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5064                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5065
5066                 if (temp != data)
5067                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5068
5069                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5070                 gfx_v8_0_wait_for_rlc_serdes(adev);
5071
5072                 /* 5 - clear mgcg override */
5073                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5074
5075                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5076                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5077                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5078                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5079                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5080                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5081                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5082                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5083                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5084                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5085                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5086                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5087                         if (temp != data)
5088                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5089                 }
5090                 udelay(50);
5091
5092                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5093                 gfx_v8_0_wait_for_rlc_serdes(adev);
5094         } else {
5095                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5096                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5097                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5098                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5099                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5100                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5101                 if (temp != data)
5102                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5103
5104                 /* 2 - disable MGLS in RLC */
5105                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5106                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5107                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5108                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5109                 }
5110
5111                 /* 3 - disable MGLS in CP */
5112                 data = RREG32(mmCP_MEM_SLP_CNTL);
5113                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5114                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5115                         WREG32(mmCP_MEM_SLP_CNTL, data);
5116                 }
5117
5118                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5119                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5120                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5121                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5122                 if (temp != data)
5123                         WREG32(mmCGTS_SM_CTRL_REG, data);
5124
5125                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5126                 gfx_v8_0_wait_for_rlc_serdes(adev);
5127
5128                 /* 6 - set mgcg override */
5129                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5130
5131                 udelay(50);
5132
5133                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5134                 gfx_v8_0_wait_for_rlc_serdes(adev);
5135         }
5136
5137         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5138 }
5139
5140 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5141                                                       bool enable)
5142 {
5143         uint32_t temp, temp1, data, data1;
5144
5145         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5146
5147         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5148
5149         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5150                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5151                  * Cmp_busy/GFX_Idle interrupts
5152                  */
5153                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5154
5155                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5156                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5157                 if (temp1 != data1)
5158                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5159
5160                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5161                 gfx_v8_0_wait_for_rlc_serdes(adev);
5162
5163                 /* 3 - clear cgcg override */
5164                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5165
5166                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5167                 gfx_v8_0_wait_for_rlc_serdes(adev);
5168
5169                 /* 4 - write cmd to set CGLS */
5170                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5171
5172                 /* 5 - enable cgcg */
5173                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5174
5175                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5176                         /* enable cgls*/
5177                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5178
5179                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5180                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5181
5182                         if (temp1 != data1)
5183                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5184                 } else {
5185                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5186                 }
5187
5188                 if (temp != data)
5189                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5190         } else {
5191                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5192                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5193
5194                 /* TEST CGCG */
5195                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5196                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5197                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5198                 if (temp1 != data1)
5199                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5200
5201                 /* read gfx register to wake up cgcg */
5202                 RREG32(mmCB_CGTT_SCLK_CTRL);
5203                 RREG32(mmCB_CGTT_SCLK_CTRL);
5204                 RREG32(mmCB_CGTT_SCLK_CTRL);
5205                 RREG32(mmCB_CGTT_SCLK_CTRL);
5206
5207                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5208                 gfx_v8_0_wait_for_rlc_serdes(adev);
5209
5210                 /* write cmd to Set CGCG Overrride */
5211                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5212
5213                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5214                 gfx_v8_0_wait_for_rlc_serdes(adev);
5215
5216                 /* write cmd to Clear CGLS */
5217                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5218
5219                 /* disable cgcg, cgls should be disabled too. */
5220                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5221                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5222                 if (temp != data)
5223                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5224         }
5225
5226         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5227 }
5228 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5229                                             bool enable)
5230 {
5231         if (enable) {
5232                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5233                  * ===  MGCG + MGLS + TS(CG/LS) ===
5234                  */
5235                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5236                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5237         } else {
5238                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5239                  * ===  CGCG + CGLS ===
5240                  */
5241                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5242                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5243         }
5244         return 0;
5245 }
5246
5247 static int gfx_v8_0_set_clockgating_state(void *handle,
5248                                           enum amd_clockgating_state state)
5249 {
5250         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5251
5252         switch (adev->asic_type) {
5253         case CHIP_FIJI:
5254         case CHIP_CARRIZO:
5255         case CHIP_STONEY:
5256                 gfx_v8_0_update_gfx_clock_gating(adev,
5257                                                  state == AMD_CG_STATE_GATE ? true : false);
5258                 break;
5259         default:
5260                 break;
5261         }
5262         return 0;
5263 }
5264
5265 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5266 {
5267         u32 rptr;
5268
5269         rptr = ring->adev->wb.wb[ring->rptr_offs];
5270
5271         return rptr;
5272 }
5273
5274 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5275 {
5276         struct amdgpu_device *adev = ring->adev;
5277         u32 wptr;
5278
5279         if (ring->use_doorbell)
5280                 /* XXX check if swapping is necessary on BE */
5281                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5282         else
5283                 wptr = RREG32(mmCP_RB0_WPTR);
5284
5285         return wptr;
5286 }
5287
5288 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5289 {
5290         struct amdgpu_device *adev = ring->adev;
5291
5292         if (ring->use_doorbell) {
5293                 /* XXX check if swapping is necessary on BE */
5294                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5295                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5296         } else {
5297                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5298                 (void)RREG32(mmCP_RB0_WPTR);
5299         }
5300 }
5301
5302 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5303 {
5304         u32 ref_and_mask, reg_mem_engine;
5305
5306         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5307                 switch (ring->me) {
5308                 case 1:
5309                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5310                         break;
5311                 case 2:
5312                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5313                         break;
5314                 default:
5315                         return;
5316                 }
5317                 reg_mem_engine = 0;
5318         } else {
5319                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5320                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5321         }
5322
5323         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5324         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5325                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5326                                  reg_mem_engine));
5327         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5328         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5329         amdgpu_ring_write(ring, ref_and_mask);
5330         amdgpu_ring_write(ring, ref_and_mask);
5331         amdgpu_ring_write(ring, 0x20); /* poll interval */
5332 }
5333
5334 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5335 {
5336         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5337         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5338                                  WRITE_DATA_DST_SEL(0) |
5339                                  WR_CONFIRM));
5340         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5341         amdgpu_ring_write(ring, 0);
5342         amdgpu_ring_write(ring, 1);
5343
5344 }
5345
5346 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5347                                   struct amdgpu_ib *ib)
5348 {
5349         bool need_ctx_switch = ring->current_ctx != ib->ctx;
5350         u32 header, control = 0;
5351         u32 next_rptr = ring->wptr + 5;
5352
5353         /* drop the CE preamble IB for the same context */
5354         if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
5355                 return;
5356
5357         if (need_ctx_switch)
5358                 next_rptr += 2;
5359
5360         next_rptr += 4;
5361         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5362         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5363         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5364         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5365         amdgpu_ring_write(ring, next_rptr);
5366
5367         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5368         if (need_ctx_switch) {
5369                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5370                 amdgpu_ring_write(ring, 0);
5371         }
5372
5373         if (ib->flags & AMDGPU_IB_FLAG_CE)
5374                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5375         else
5376                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5377
5378         control |= ib->length_dw | (ib->vm_id << 24);
5379
5380         amdgpu_ring_write(ring, header);
5381         amdgpu_ring_write(ring,
5382 #ifdef __BIG_ENDIAN
5383                           (2 << 0) |
5384 #endif
5385                           (ib->gpu_addr & 0xFFFFFFFC));
5386         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5387         amdgpu_ring_write(ring, control);
5388 }
5389
5390 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5391                                   struct amdgpu_ib *ib)
5392 {
5393         u32 header, control = 0;
5394         u32 next_rptr = ring->wptr + 5;
5395
5396         control |= INDIRECT_BUFFER_VALID;
5397
5398         next_rptr += 4;
5399         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5400         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5401         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5402         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5403         amdgpu_ring_write(ring, next_rptr);
5404
5405         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5406
5407         control |= ib->length_dw | (ib->vm_id << 24);
5408
5409         amdgpu_ring_write(ring, header);
5410         amdgpu_ring_write(ring,
5411 #ifdef __BIG_ENDIAN
5412                                           (2 << 0) |
5413 #endif
5414                                           (ib->gpu_addr & 0xFFFFFFFC));
5415         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5416         amdgpu_ring_write(ring, control);
5417 }
5418
5419 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5420                                          u64 seq, unsigned flags)
5421 {
5422         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5423         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5424
5425         /* EVENT_WRITE_EOP - flush caches, send int */
5426         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5427         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5428                                  EOP_TC_ACTION_EN |
5429                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5430                                  EVENT_INDEX(5)));
5431         amdgpu_ring_write(ring, addr & 0xfffffffc);
5432         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5433                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5434         amdgpu_ring_write(ring, lower_32_bits(seq));
5435         amdgpu_ring_write(ring, upper_32_bits(seq));
5436
5437 }
5438
5439 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5440 {
5441         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5442         uint32_t seq = ring->fence_drv.sync_seq;
5443         uint64_t addr = ring->fence_drv.gpu_addr;
5444
5445         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5446         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5447                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
5448                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5449         amdgpu_ring_write(ring, addr & 0xfffffffc);
5450         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5451         amdgpu_ring_write(ring, seq);
5452         amdgpu_ring_write(ring, 0xffffffff);
5453         amdgpu_ring_write(ring, 4); /* poll interval */
5454
5455         if (usepfp) {
5456                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5457                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5458                 amdgpu_ring_write(ring, 0);
5459                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5460                 amdgpu_ring_write(ring, 0);
5461         }
5462 }
5463
5464 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5465                                         unsigned vm_id, uint64_t pd_addr)
5466 {
5467         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5468
5469         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5470         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5471                                  WRITE_DATA_DST_SEL(0)) |
5472                                  WR_CONFIRM);
5473         if (vm_id < 8) {
5474                 amdgpu_ring_write(ring,
5475                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5476         } else {
5477                 amdgpu_ring_write(ring,
5478                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5479         }
5480         amdgpu_ring_write(ring, 0);
5481         amdgpu_ring_write(ring, pd_addr >> 12);
5482
5483         /* bits 0-15 are the VM contexts0-15 */
5484         /* invalidate the cache */
5485         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5486         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5487                                  WRITE_DATA_DST_SEL(0)));
5488         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5489         amdgpu_ring_write(ring, 0);
5490         amdgpu_ring_write(ring, 1 << vm_id);
5491
5492         /* wait for the invalidate to complete */
5493         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5494         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5495                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5496                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5497         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5498         amdgpu_ring_write(ring, 0);
5499         amdgpu_ring_write(ring, 0); /* ref */
5500         amdgpu_ring_write(ring, 0); /* mask */
5501         amdgpu_ring_write(ring, 0x20); /* poll interval */
5502
5503         /* compute doesn't have PFP */
5504         if (usepfp) {
5505                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5506                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5507                 amdgpu_ring_write(ring, 0x0);
5508                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5509                 amdgpu_ring_write(ring, 0);
5510                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5511                 amdgpu_ring_write(ring, 0);
5512         }
5513 }
5514
5515 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5516 {
5517         return ring->adev->wb.wb[ring->rptr_offs];
5518 }
5519
5520 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5521 {
5522         return ring->adev->wb.wb[ring->wptr_offs];
5523 }
5524
5525 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5526 {
5527         struct amdgpu_device *adev = ring->adev;
5528
5529         /* XXX check if swapping is necessary on BE */
5530         adev->wb.wb[ring->wptr_offs] = ring->wptr;
5531         WDOORBELL32(ring->doorbell_index, ring->wptr);
5532 }
5533
5534 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5535                                              u64 addr, u64 seq,
5536                                              unsigned flags)
5537 {
5538         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5539         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5540
5541         /* RELEASE_MEM - flush caches, send int */
5542         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5543         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5544                                  EOP_TC_ACTION_EN |
5545                                  EOP_TC_WB_ACTION_EN |
5546                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5547                                  EVENT_INDEX(5)));
5548         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5549         amdgpu_ring_write(ring, addr & 0xfffffffc);
5550         amdgpu_ring_write(ring, upper_32_bits(addr));
5551         amdgpu_ring_write(ring, lower_32_bits(seq));
5552         amdgpu_ring_write(ring, upper_32_bits(seq));
5553 }
5554
5555 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5556                                                  enum amdgpu_interrupt_state state)
5557 {
5558         u32 cp_int_cntl;
5559
5560         switch (state) {
5561         case AMDGPU_IRQ_STATE_DISABLE:
5562                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5563                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5564                                             TIME_STAMP_INT_ENABLE, 0);
5565                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5566                 break;
5567         case AMDGPU_IRQ_STATE_ENABLE:
5568                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5569                 cp_int_cntl =
5570                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5571                                       TIME_STAMP_INT_ENABLE, 1);
5572                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5573                 break;
5574         default:
5575                 break;
5576         }
5577 }
5578
5579 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5580                                                      int me, int pipe,
5581                                                      enum amdgpu_interrupt_state state)
5582 {
5583         u32 mec_int_cntl, mec_int_cntl_reg;
5584
5585         /*
5586          * amdgpu controls only pipe 0 of MEC1. That's why this function only
5587          * handles the setting of interrupts for this specific pipe. All other
5588          * pipes' interrupts are set by amdkfd.
5589          */
5590
5591         if (me == 1) {
5592                 switch (pipe) {
5593                 case 0:
5594                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5595                         break;
5596                 default:
5597                         DRM_DEBUG("invalid pipe %d\n", pipe);
5598                         return;
5599                 }
5600         } else {
5601                 DRM_DEBUG("invalid me %d\n", me);
5602                 return;
5603         }
5604
5605         switch (state) {
5606         case AMDGPU_IRQ_STATE_DISABLE:
5607                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5608                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5609                                              TIME_STAMP_INT_ENABLE, 0);
5610                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5611                 break;
5612         case AMDGPU_IRQ_STATE_ENABLE:
5613                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5614                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5615                                              TIME_STAMP_INT_ENABLE, 1);
5616                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5617                 break;
5618         default:
5619                 break;
5620         }
5621 }
5622
5623 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5624                                              struct amdgpu_irq_src *source,
5625                                              unsigned type,
5626                                              enum amdgpu_interrupt_state state)
5627 {
5628         u32 cp_int_cntl;
5629
5630         switch (state) {
5631         case AMDGPU_IRQ_STATE_DISABLE:
5632                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5633                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5634                                             PRIV_REG_INT_ENABLE, 0);
5635                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5636                 break;
5637         case AMDGPU_IRQ_STATE_ENABLE:
5638                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5639                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5640                                             PRIV_REG_INT_ENABLE, 1);
5641                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5642                 break;
5643         default:
5644                 break;
5645         }
5646
5647         return 0;
5648 }
5649
5650 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5651                                               struct amdgpu_irq_src *source,
5652                                               unsigned type,
5653                                               enum amdgpu_interrupt_state state)
5654 {
5655         u32 cp_int_cntl;
5656
5657         switch (state) {
5658         case AMDGPU_IRQ_STATE_DISABLE:
5659                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5660                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5661                                             PRIV_INSTR_INT_ENABLE, 0);
5662                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5663                 break;
5664         case AMDGPU_IRQ_STATE_ENABLE:
5665                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5666                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5667                                             PRIV_INSTR_INT_ENABLE, 1);
5668                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5669                 break;
5670         default:
5671                 break;
5672         }
5673
5674         return 0;
5675 }
5676
5677 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5678                                             struct amdgpu_irq_src *src,
5679                                             unsigned type,
5680                                             enum amdgpu_interrupt_state state)
5681 {
5682         switch (type) {
5683         case AMDGPU_CP_IRQ_GFX_EOP:
5684                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5685                 break;
5686         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5687                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5688                 break;
5689         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5690                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5691                 break;
5692         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5693                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5694                 break;
5695         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5696                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5697                 break;
5698         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5699                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5700                 break;
5701         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5702                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5703                 break;
5704         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5705                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5706                 break;
5707         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5708                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5709                 break;
5710         default:
5711                 break;
5712         }
5713         return 0;
5714 }
5715
5716 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5717                             struct amdgpu_irq_src *source,
5718                             struct amdgpu_iv_entry *entry)
5719 {
5720         int i;
5721         u8 me_id, pipe_id, queue_id;
5722         struct amdgpu_ring *ring;
5723
5724         DRM_DEBUG("IH: CP EOP\n");
5725         me_id = (entry->ring_id & 0x0c) >> 2;
5726         pipe_id = (entry->ring_id & 0x03) >> 0;
5727         queue_id = (entry->ring_id & 0x70) >> 4;
5728
5729         switch (me_id) {
5730         case 0:
5731                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5732                 break;
5733         case 1:
5734         case 2:
5735                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5736                         ring = &adev->gfx.compute_ring[i];
5737                         /* Per-queue interrupt is supported for MEC starting from VI.
5738                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5739                           */
5740                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5741                                 amdgpu_fence_process(ring);
5742                 }
5743                 break;
5744         }
5745         return 0;
5746 }
5747
5748 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5749                                  struct amdgpu_irq_src *source,
5750                                  struct amdgpu_iv_entry *entry)
5751 {
5752         DRM_ERROR("Illegal register access in command stream\n");
5753         schedule_work(&adev->reset_work);
5754         return 0;
5755 }
5756
5757 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5758                                   struct amdgpu_irq_src *source,
5759                                   struct amdgpu_iv_entry *entry)
5760 {
5761         DRM_ERROR("Illegal instruction in command stream\n");
5762         schedule_work(&adev->reset_work);
5763         return 0;
5764 }
5765
5766 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5767         .early_init = gfx_v8_0_early_init,
5768         .late_init = gfx_v8_0_late_init,
5769         .sw_init = gfx_v8_0_sw_init,
5770         .sw_fini = gfx_v8_0_sw_fini,
5771         .hw_init = gfx_v8_0_hw_init,
5772         .hw_fini = gfx_v8_0_hw_fini,
5773         .suspend = gfx_v8_0_suspend,
5774         .resume = gfx_v8_0_resume,
5775         .is_idle = gfx_v8_0_is_idle,
5776         .wait_for_idle = gfx_v8_0_wait_for_idle,
5777         .soft_reset = gfx_v8_0_soft_reset,
5778         .print_status = gfx_v8_0_print_status,
5779         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5780         .set_powergating_state = gfx_v8_0_set_powergating_state,
5781 };
5782
5783 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5784         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5785         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5786         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5787         .parse_cs = NULL,
5788         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5789         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5790         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5791         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5792         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5793         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5794         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5795         .test_ring = gfx_v8_0_ring_test_ring,
5796         .test_ib = gfx_v8_0_ring_test_ib,
5797         .insert_nop = amdgpu_ring_insert_nop,
5798         .pad_ib = amdgpu_ring_generic_pad_ib,
5799 };
5800
5801 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5802         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5803         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5804         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5805         .parse_cs = NULL,
5806         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5807         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5808         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5809         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5810         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5811         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5812         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5813         .test_ring = gfx_v8_0_ring_test_ring,
5814         .test_ib = gfx_v8_0_ring_test_ib,
5815         .insert_nop = amdgpu_ring_insert_nop,
5816         .pad_ib = amdgpu_ring_generic_pad_ib,
5817 };
5818
5819 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5820 {
5821         int i;
5822
5823         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5824                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5825
5826         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5827                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5828 }
5829
5830 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5831         .set = gfx_v8_0_set_eop_interrupt_state,
5832         .process = gfx_v8_0_eop_irq,
5833 };
5834
5835 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5836         .set = gfx_v8_0_set_priv_reg_fault_state,
5837         .process = gfx_v8_0_priv_reg_irq,
5838 };
5839
5840 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5841         .set = gfx_v8_0_set_priv_inst_fault_state,
5842         .process = gfx_v8_0_priv_inst_irq,
5843 };
5844
5845 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5846 {
5847         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5848         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5849
5850         adev->gfx.priv_reg_irq.num_types = 1;
5851         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5852
5853         adev->gfx.priv_inst_irq.num_types = 1;
5854         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5855 }
5856
5857 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
5858 {
5859         switch (adev->asic_type) {
5860         case CHIP_TOPAZ:
5861         case CHIP_STONEY:
5862                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
5863                 break;
5864         case CHIP_CARRIZO:
5865                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
5866                 break;
5867         default:
5868                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
5869                 break;
5870         }
5871 }
5872
5873 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5874 {
5875         /* init asci gds info */
5876         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5877         adev->gds.gws.total_size = 64;
5878         adev->gds.oa.total_size = 16;
5879
5880         if (adev->gds.mem.total_size == 64 * 1024) {
5881                 adev->gds.mem.gfx_partition_size = 4096;
5882                 adev->gds.mem.cs_partition_size = 4096;
5883
5884                 adev->gds.gws.gfx_partition_size = 4;
5885                 adev->gds.gws.cs_partition_size = 4;
5886
5887                 adev->gds.oa.gfx_partition_size = 4;
5888                 adev->gds.oa.cs_partition_size = 1;
5889         } else {
5890                 adev->gds.mem.gfx_partition_size = 1024;
5891                 adev->gds.mem.cs_partition_size = 1024;
5892
5893                 adev->gds.gws.gfx_partition_size = 16;
5894                 adev->gds.gws.cs_partition_size = 16;
5895
5896                 adev->gds.oa.gfx_partition_size = 4;
5897                 adev->gds.oa.cs_partition_size = 4;
5898         }
5899 }
5900
5901 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5902 {
5903         u32 data, mask;
5904
5905         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5906         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5907
5908         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5909         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5910
5911         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
5912
5913         return (~data) & mask;
5914 }
5915
5916 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5917                          struct amdgpu_cu_info *cu_info)
5918 {
5919         int i, j, k, counter, active_cu_number = 0;
5920         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5921
5922         if (!adev || !cu_info)
5923                 return -EINVAL;
5924
5925         memset(cu_info, 0, sizeof(*cu_info));
5926
5927         mutex_lock(&adev->grbm_idx_mutex);
5928         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5929                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5930                         mask = 1;
5931                         ao_bitmap = 0;
5932                         counter = 0;
5933                         gfx_v8_0_select_se_sh(adev, i, j);
5934                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5935                         cu_info->bitmap[i][j] = bitmap;
5936
5937                         for (k = 0; k < 16; k ++) {
5938                                 if (bitmap & mask) {
5939                                         if (counter < 2)
5940                                                 ao_bitmap |= mask;
5941                                         counter ++;
5942                                 }
5943                                 mask <<= 1;
5944                         }
5945                         active_cu_number += counter;
5946                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5947                 }
5948         }
5949         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5950         mutex_unlock(&adev->grbm_idx_mutex);
5951
5952         cu_info->number = active_cu_number;
5953         cu_info->ao_cu_mask = ao_cu_mask;
5954
5955         return 0;
5956 }