Merge tag 'tilcdc-4.10-fixes' of https://github.com/jsarha/linux into drm-fixes
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
143 {
144         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
145         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
146         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
147         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
148         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
149         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
150         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
151         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
152         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
153         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
154         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
155         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
156         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
157         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
158         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
159         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
160 };
161
162 static const u32 golden_settings_tonga_a11[] =
163 {
164         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
165         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
166         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
167         mmGB_GPU_ID, 0x0000000f, 0x00000000,
168         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
169         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
170         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
171         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
172         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
173         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
174         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
175         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
176         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
177         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
178         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
179         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
180 };
181
182 static const u32 tonga_golden_common_all[] =
183 {
184         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
185         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
186         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
187         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
188         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
189         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
190         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
191         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
192 };
193
194 static const u32 tonga_mgcg_cgcg_init[] =
195 {
196         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
197         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
198         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
203         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
207         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
212         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
216         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
217         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
218         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
221         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
222         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
223         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
224         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
225         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
226         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
230         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
235         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
240         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
245         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
250         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
255         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
260         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
265         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
268         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
269         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
270         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
271 };
272
273 static const u32 golden_settings_polaris11_a11[] =
274 {
275         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
276         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
277         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
278         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
279         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
280         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
281         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
282         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
283         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
284         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
285         mmSQ_CONFIG, 0x07f80000, 0x01180000,
286         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
287         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
288         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
289         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
290         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
291         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
292 };
293
294 static const u32 polaris11_golden_common_all[] =
295 {
296         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
297         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
298         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
299         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
300         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
301         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
302 };
303
304 static const u32 golden_settings_polaris10_a11[] =
305 {
306         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
307         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
308         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
309         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
310         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
311         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
312         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
313         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
314         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
315         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
316         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
317         mmSQ_CONFIG, 0x07f80000, 0x07180000,
318         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
319         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
320         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
321         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
322         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324
325 static const u32 polaris10_golden_common_all[] =
326 {
327         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
329         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
330         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
331         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
332         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
334         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
335 };
336
337 static const u32 fiji_golden_common_all[] =
338 {
339         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
341         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
342         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
343         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
347         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
348         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
349 };
350
351 static const u32 golden_settings_fiji_a10[] =
352 {
353         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
354         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
355         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
356         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
357         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
358         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
359         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
360         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
361         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
362         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
363         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
364 };
365
366 static const u32 fiji_mgcg_cgcg_init[] =
367 {
368         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
369         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
375         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
377         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
379         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
384         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
387         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
388         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
389         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
390         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
393         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
394         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
395         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
396         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
397         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
398         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
399         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
400         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
401         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
402         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
403 };
404
405 static const u32 golden_settings_iceland_a11[] =
406 {
407         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
408         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
409         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
410         mmGB_GPU_ID, 0x0000000f, 0x00000000,
411         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
412         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
413         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
414         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
415         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
416         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
417         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
418         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
419         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
420         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
421         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
422         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
423 };
424
425 static const u32 iceland_golden_common_all[] =
426 {
427         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
428         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
429         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
430         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
431         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
432         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
433         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
434         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
435 };
436
437 static const u32 iceland_mgcg_cgcg_init[] =
438 {
439         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
440         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
441         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
444         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
445         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
446         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
448         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
450         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
457         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
458         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
459         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
460         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
461         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
462         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
464         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
465         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
466         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
467         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
468         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
469         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
470         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
471         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
472         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
473         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
474         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
475         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
476         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
477         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
478         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
479         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
480         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
483         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
488         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
491         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
492         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
493         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
494         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
495         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
496         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
497         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
498         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
499         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
500         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
501         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
502         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
503 };
504
505 static const u32 cz_golden_settings_a11[] =
506 {
507         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
508         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
509         mmGB_GPU_ID, 0x0000000f, 0x00000000,
510         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
511         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
512         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
513         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
514         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
515         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
516         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
517         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
518         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
519 };
520
521 static const u32 cz_golden_common_all[] =
522 {
523         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
524         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
525         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
526         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
527         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
528         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
529         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
530         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
531 };
532
533 static const u32 cz_mgcg_cgcg_init[] =
534 {
535         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
536         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
537         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
542         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
544         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
546         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
551         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
553         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
554         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
555         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
556         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
557         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
560         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
561         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
562         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
563         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
564         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
565         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
566         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
567         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
568         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
569         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
570         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
571         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
572         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
573         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
574         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
575         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
576         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
577         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
578         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
579         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
580         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
581         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
582         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
583         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
584         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
585         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
586         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
587         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
588         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
589         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
590         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
591         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
592         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
593         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
594         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
595         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
596         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
597         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
598         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
599         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
600         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
601         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
602         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
603         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
604         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
605         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
606         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
607         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
608         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
609         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
610 };
611
612 static const u32 stoney_golden_settings_a11[] =
613 {
614         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
615         mmGB_GPU_ID, 0x0000000f, 0x00000000,
616         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
617         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
618         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
619         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
620         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
621         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
622         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
623         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
624 };
625
626 static const u32 stoney_golden_common_all[] =
627 {
628         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
630         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
631         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
632         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
633         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
634         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
635         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
636 };
637
638 static const u32 stoney_mgcg_cgcg_init[] =
639 {
640         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
641         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
642         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
643         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
644         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
645 };
646
647 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
650 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
651 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
652 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
653
654 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
655 {
656         switch (adev->asic_type) {
657         case CHIP_TOPAZ:
658                 amdgpu_program_register_sequence(adev,
659                                                  iceland_mgcg_cgcg_init,
660                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
661                 amdgpu_program_register_sequence(adev,
662                                                  golden_settings_iceland_a11,
663                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
664                 amdgpu_program_register_sequence(adev,
665                                                  iceland_golden_common_all,
666                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
667                 break;
668         case CHIP_FIJI:
669                 amdgpu_program_register_sequence(adev,
670                                                  fiji_mgcg_cgcg_init,
671                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
672                 amdgpu_program_register_sequence(adev,
673                                                  golden_settings_fiji_a10,
674                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
675                 amdgpu_program_register_sequence(adev,
676                                                  fiji_golden_common_all,
677                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
678                 break;
679
680         case CHIP_TONGA:
681                 amdgpu_program_register_sequence(adev,
682                                                  tonga_mgcg_cgcg_init,
683                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
684                 amdgpu_program_register_sequence(adev,
685                                                  golden_settings_tonga_a11,
686                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
687                 amdgpu_program_register_sequence(adev,
688                                                  tonga_golden_common_all,
689                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
690                 break;
691         case CHIP_POLARIS11:
692                 amdgpu_program_register_sequence(adev,
693                                                  golden_settings_polaris11_a11,
694                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
695                 amdgpu_program_register_sequence(adev,
696                                                  polaris11_golden_common_all,
697                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
698                 break;
699         case CHIP_POLARIS10:
700                 amdgpu_program_register_sequence(adev,
701                                                  golden_settings_polaris10_a11,
702                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
703                 amdgpu_program_register_sequence(adev,
704                                                  polaris10_golden_common_all,
705                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
706                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
707                 if (adev->pdev->revision == 0xc7 &&
708                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
709                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
710                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
711                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
712                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
713                 }
714                 break;
715         case CHIP_CARRIZO:
716                 amdgpu_program_register_sequence(adev,
717                                                  cz_mgcg_cgcg_init,
718                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
719                 amdgpu_program_register_sequence(adev,
720                                                  cz_golden_settings_a11,
721                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
722                 amdgpu_program_register_sequence(adev,
723                                                  cz_golden_common_all,
724                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
725                 break;
726         case CHIP_STONEY:
727                 amdgpu_program_register_sequence(adev,
728                                                  stoney_mgcg_cgcg_init,
729                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
730                 amdgpu_program_register_sequence(adev,
731                                                  stoney_golden_settings_a11,
732                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
733                 amdgpu_program_register_sequence(adev,
734                                                  stoney_golden_common_all,
735                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
736                 break;
737         default:
738                 break;
739         }
740 }
741
742 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
743 {
744         int i;
745
746         adev->gfx.scratch.num_reg = 7;
747         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
748         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
749                 adev->gfx.scratch.free[i] = true;
750                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
751         }
752 }
753
754 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
755 {
756         struct amdgpu_device *adev = ring->adev;
757         uint32_t scratch;
758         uint32_t tmp = 0;
759         unsigned i;
760         int r;
761
762         r = amdgpu_gfx_scratch_get(adev, &scratch);
763         if (r) {
764                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
765                 return r;
766         }
767         WREG32(scratch, 0xCAFEDEAD);
768         r = amdgpu_ring_alloc(ring, 3);
769         if (r) {
770                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
771                           ring->idx, r);
772                 amdgpu_gfx_scratch_free(adev, scratch);
773                 return r;
774         }
775         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
776         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
777         amdgpu_ring_write(ring, 0xDEADBEEF);
778         amdgpu_ring_commit(ring);
779
780         for (i = 0; i < adev->usec_timeout; i++) {
781                 tmp = RREG32(scratch);
782                 if (tmp == 0xDEADBEEF)
783                         break;
784                 DRM_UDELAY(1);
785         }
786         if (i < adev->usec_timeout) {
787                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
788                          ring->idx, i);
789         } else {
790                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
791                           ring->idx, scratch, tmp);
792                 r = -EINVAL;
793         }
794         amdgpu_gfx_scratch_free(adev, scratch);
795         return r;
796 }
797
798 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
799 {
800         struct amdgpu_device *adev = ring->adev;
801         struct amdgpu_ib ib;
802         struct dma_fence *f = NULL;
803         uint32_t scratch;
804         uint32_t tmp = 0;
805         long r;
806
807         r = amdgpu_gfx_scratch_get(adev, &scratch);
808         if (r) {
809                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
810                 return r;
811         }
812         WREG32(scratch, 0xCAFEDEAD);
813         memset(&ib, 0, sizeof(ib));
814         r = amdgpu_ib_get(adev, NULL, 256, &ib);
815         if (r) {
816                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
817                 goto err1;
818         }
819         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
820         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
821         ib.ptr[2] = 0xDEADBEEF;
822         ib.length_dw = 3;
823
824         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
825         if (r)
826                 goto err2;
827
828         r = dma_fence_wait_timeout(f, false, timeout);
829         if (r == 0) {
830                 DRM_ERROR("amdgpu: IB test timed out.\n");
831                 r = -ETIMEDOUT;
832                 goto err2;
833         } else if (r < 0) {
834                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
835                 goto err2;
836         }
837         tmp = RREG32(scratch);
838         if (tmp == 0xDEADBEEF) {
839                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
840                 r = 0;
841         } else {
842                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
843                           scratch, tmp);
844                 r = -EINVAL;
845         }
846 err2:
847         amdgpu_ib_free(adev, &ib, NULL);
848         dma_fence_put(f);
849 err1:
850         amdgpu_gfx_scratch_free(adev, scratch);
851         return r;
852 }
853
854
855 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
856         release_firmware(adev->gfx.pfp_fw);
857         adev->gfx.pfp_fw = NULL;
858         release_firmware(adev->gfx.me_fw);
859         adev->gfx.me_fw = NULL;
860         release_firmware(adev->gfx.ce_fw);
861         adev->gfx.ce_fw = NULL;
862         release_firmware(adev->gfx.rlc_fw);
863         adev->gfx.rlc_fw = NULL;
864         release_firmware(adev->gfx.mec_fw);
865         adev->gfx.mec_fw = NULL;
866         if ((adev->asic_type != CHIP_STONEY) &&
867             (adev->asic_type != CHIP_TOPAZ))
868                 release_firmware(adev->gfx.mec2_fw);
869         adev->gfx.mec2_fw = NULL;
870
871         kfree(adev->gfx.rlc.register_list_format);
872 }
873
874 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
875 {
876         const char *chip_name;
877         char fw_name[30];
878         int err;
879         struct amdgpu_firmware_info *info = NULL;
880         const struct common_firmware_header *header = NULL;
881         const struct gfx_firmware_header_v1_0 *cp_hdr;
882         const struct rlc_firmware_header_v2_0 *rlc_hdr;
883         unsigned int *tmp = NULL, i;
884
885         DRM_DEBUG("\n");
886
887         switch (adev->asic_type) {
888         case CHIP_TOPAZ:
889                 chip_name = "topaz";
890                 break;
891         case CHIP_TONGA:
892                 chip_name = "tonga";
893                 break;
894         case CHIP_CARRIZO:
895                 chip_name = "carrizo";
896                 break;
897         case CHIP_FIJI:
898                 chip_name = "fiji";
899                 break;
900         case CHIP_POLARIS11:
901                 chip_name = "polaris11";
902                 break;
903         case CHIP_POLARIS10:
904                 chip_name = "polaris10";
905                 break;
906         case CHIP_STONEY:
907                 chip_name = "stoney";
908                 break;
909         default:
910                 BUG();
911         }
912
913         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
914         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
915         if (err)
916                 goto out;
917         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
918         if (err)
919                 goto out;
920         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
921         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
922         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
923
924         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
925         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
926         if (err)
927                 goto out;
928         err = amdgpu_ucode_validate(adev->gfx.me_fw);
929         if (err)
930                 goto out;
931         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
932         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934
935         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
936         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
937         if (err)
938                 goto out;
939         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
940         if (err)
941                 goto out;
942         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
943         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
945
946         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
947         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
948         if (err)
949                 goto out;
950         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
951         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
952         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
953         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
954
955         adev->gfx.rlc.save_and_restore_offset =
956                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
957         adev->gfx.rlc.clear_state_descriptor_offset =
958                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
959         adev->gfx.rlc.avail_scratch_ram_locations =
960                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
961         adev->gfx.rlc.reg_restore_list_size =
962                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
963         adev->gfx.rlc.reg_list_format_start =
964                         le32_to_cpu(rlc_hdr->reg_list_format_start);
965         adev->gfx.rlc.reg_list_format_separate_start =
966                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
967         adev->gfx.rlc.starting_offsets_start =
968                         le32_to_cpu(rlc_hdr->starting_offsets_start);
969         adev->gfx.rlc.reg_list_format_size_bytes =
970                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
971         adev->gfx.rlc.reg_list_size_bytes =
972                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
973
974         adev->gfx.rlc.register_list_format =
975                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
976                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
977
978         if (!adev->gfx.rlc.register_list_format) {
979                 err = -ENOMEM;
980                 goto out;
981         }
982
983         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
984                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
985         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
986                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
987
988         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
989
990         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
991                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
992         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
993                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
994
995         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
996         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
997         if (err)
998                 goto out;
999         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1000         if (err)
1001                 goto out;
1002         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1003         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1004         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1005
1006         if ((adev->asic_type != CHIP_STONEY) &&
1007             (adev->asic_type != CHIP_TOPAZ)) {
1008                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1009                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1010                 if (!err) {
1011                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1012                         if (err)
1013                                 goto out;
1014                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1015                                 adev->gfx.mec2_fw->data;
1016                         adev->gfx.mec2_fw_version =
1017                                 le32_to_cpu(cp_hdr->header.ucode_version);
1018                         adev->gfx.mec2_feature_version =
1019                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1020                 } else {
1021                         err = 0;
1022                         adev->gfx.mec2_fw = NULL;
1023                 }
1024         }
1025
1026         if (adev->firmware.smu_load) {
1027                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1028                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1029                 info->fw = adev->gfx.pfp_fw;
1030                 header = (const struct common_firmware_header *)info->fw->data;
1031                 adev->firmware.fw_size +=
1032                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1033
1034                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1035                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1036                 info->fw = adev->gfx.me_fw;
1037                 header = (const struct common_firmware_header *)info->fw->data;
1038                 adev->firmware.fw_size +=
1039                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1040
1041                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1042                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1043                 info->fw = adev->gfx.ce_fw;
1044                 header = (const struct common_firmware_header *)info->fw->data;
1045                 adev->firmware.fw_size +=
1046                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1047
1048                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1049                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1050                 info->fw = adev->gfx.rlc_fw;
1051                 header = (const struct common_firmware_header *)info->fw->data;
1052                 adev->firmware.fw_size +=
1053                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054
1055                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1056                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1057                 info->fw = adev->gfx.mec_fw;
1058                 header = (const struct common_firmware_header *)info->fw->data;
1059                 adev->firmware.fw_size +=
1060                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061
1062                 /* we need account JT in */
1063                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1064                 adev->firmware.fw_size +=
1065                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1066
1067                 if (amdgpu_sriov_vf(adev)) {
1068                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1069                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1070                         info->fw = adev->gfx.mec_fw;
1071                         adev->firmware.fw_size +=
1072                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1073                 }
1074
1075                 if (adev->gfx.mec2_fw) {
1076                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1077                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1078                         info->fw = adev->gfx.mec2_fw;
1079                         header = (const struct common_firmware_header *)info->fw->data;
1080                         adev->firmware.fw_size +=
1081                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1082                 }
1083
1084         }
1085
1086 out:
1087         if (err) {
1088                 dev_err(adev->dev,
1089                         "gfx8: Failed to load firmware \"%s\"\n",
1090                         fw_name);
1091                 release_firmware(adev->gfx.pfp_fw);
1092                 adev->gfx.pfp_fw = NULL;
1093                 release_firmware(adev->gfx.me_fw);
1094                 adev->gfx.me_fw = NULL;
1095                 release_firmware(adev->gfx.ce_fw);
1096                 adev->gfx.ce_fw = NULL;
1097                 release_firmware(adev->gfx.rlc_fw);
1098                 adev->gfx.rlc_fw = NULL;
1099                 release_firmware(adev->gfx.mec_fw);
1100                 adev->gfx.mec_fw = NULL;
1101                 release_firmware(adev->gfx.mec2_fw);
1102                 adev->gfx.mec2_fw = NULL;
1103         }
1104         return err;
1105 }
1106
1107 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1108                                     volatile u32 *buffer)
1109 {
1110         u32 count = 0, i;
1111         const struct cs_section_def *sect = NULL;
1112         const struct cs_extent_def *ext = NULL;
1113
1114         if (adev->gfx.rlc.cs_data == NULL)
1115                 return;
1116         if (buffer == NULL)
1117                 return;
1118
1119         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1120         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1121
1122         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1123         buffer[count++] = cpu_to_le32(0x80000000);
1124         buffer[count++] = cpu_to_le32(0x80000000);
1125
1126         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1127                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1128                         if (sect->id == SECT_CONTEXT) {
1129                                 buffer[count++] =
1130                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1131                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1132                                                 PACKET3_SET_CONTEXT_REG_START);
1133                                 for (i = 0; i < ext->reg_count; i++)
1134                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1135                         } else {
1136                                 return;
1137                         }
1138                 }
1139         }
1140
1141         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1142         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1143                         PACKET3_SET_CONTEXT_REG_START);
1144         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1145         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1146
1147         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1148         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1149
1150         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1151         buffer[count++] = cpu_to_le32(0);
1152 }
1153
1154 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1155 {
1156         const __le32 *fw_data;
1157         volatile u32 *dst_ptr;
1158         int me, i, max_me = 4;
1159         u32 bo_offset = 0;
1160         u32 table_offset, table_size;
1161
1162         if (adev->asic_type == CHIP_CARRIZO)
1163                 max_me = 5;
1164
1165         /* write the cp table buffer */
1166         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1167         for (me = 0; me < max_me; me++) {
1168                 if (me == 0) {
1169                         const struct gfx_firmware_header_v1_0 *hdr =
1170                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1171                         fw_data = (const __le32 *)
1172                                 (adev->gfx.ce_fw->data +
1173                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1174                         table_offset = le32_to_cpu(hdr->jt_offset);
1175                         table_size = le32_to_cpu(hdr->jt_size);
1176                 } else if (me == 1) {
1177                         const struct gfx_firmware_header_v1_0 *hdr =
1178                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1179                         fw_data = (const __le32 *)
1180                                 (adev->gfx.pfp_fw->data +
1181                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1182                         table_offset = le32_to_cpu(hdr->jt_offset);
1183                         table_size = le32_to_cpu(hdr->jt_size);
1184                 } else if (me == 2) {
1185                         const struct gfx_firmware_header_v1_0 *hdr =
1186                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1187                         fw_data = (const __le32 *)
1188                                 (adev->gfx.me_fw->data +
1189                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1190                         table_offset = le32_to_cpu(hdr->jt_offset);
1191                         table_size = le32_to_cpu(hdr->jt_size);
1192                 } else if (me == 3) {
1193                         const struct gfx_firmware_header_v1_0 *hdr =
1194                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1195                         fw_data = (const __le32 *)
1196                                 (adev->gfx.mec_fw->data +
1197                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1198                         table_offset = le32_to_cpu(hdr->jt_offset);
1199                         table_size = le32_to_cpu(hdr->jt_size);
1200                 } else  if (me == 4) {
1201                         const struct gfx_firmware_header_v1_0 *hdr =
1202                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1203                         fw_data = (const __le32 *)
1204                                 (adev->gfx.mec2_fw->data +
1205                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1206                         table_offset = le32_to_cpu(hdr->jt_offset);
1207                         table_size = le32_to_cpu(hdr->jt_size);
1208                 }
1209
1210                 for (i = 0; i < table_size; i ++) {
1211                         dst_ptr[bo_offset + i] =
1212                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1213                 }
1214
1215                 bo_offset += table_size;
1216         }
1217 }
1218
1219 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1220 {
1221         int r;
1222
1223         /* clear state block */
1224         if (adev->gfx.rlc.clear_state_obj) {
1225                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1226                 if (unlikely(r != 0))
1227                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1228                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1229                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1230                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1231                 adev->gfx.rlc.clear_state_obj = NULL;
1232         }
1233
1234         /* jump table block */
1235         if (adev->gfx.rlc.cp_table_obj) {
1236                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1237                 if (unlikely(r != 0))
1238                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1239                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1240                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1241                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1242                 adev->gfx.rlc.cp_table_obj = NULL;
1243         }
1244 }
1245
1246 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1247 {
1248         volatile u32 *dst_ptr;
1249         u32 dws;
1250         const struct cs_section_def *cs_data;
1251         int r;
1252
1253         adev->gfx.rlc.cs_data = vi_cs_data;
1254
1255         cs_data = adev->gfx.rlc.cs_data;
1256
1257         if (cs_data) {
1258                 /* clear state block */
1259                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1260
1261                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1262                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1263                                              AMDGPU_GEM_DOMAIN_VRAM,
1264                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1265                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1266                                              NULL, NULL,
1267                                              &adev->gfx.rlc.clear_state_obj);
1268                         if (r) {
1269                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1270                                 gfx_v8_0_rlc_fini(adev);
1271                                 return r;
1272                         }
1273                 }
1274                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1275                 if (unlikely(r != 0)) {
1276                         gfx_v8_0_rlc_fini(adev);
1277                         return r;
1278                 }
1279                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1280                                   &adev->gfx.rlc.clear_state_gpu_addr);
1281                 if (r) {
1282                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1283                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1284                         gfx_v8_0_rlc_fini(adev);
1285                         return r;
1286                 }
1287
1288                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1289                 if (r) {
1290                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1291                         gfx_v8_0_rlc_fini(adev);
1292                         return r;
1293                 }
1294                 /* set up the cs buffer */
1295                 dst_ptr = adev->gfx.rlc.cs_ptr;
1296                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1297                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1298                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1299         }
1300
1301         if ((adev->asic_type == CHIP_CARRIZO) ||
1302             (adev->asic_type == CHIP_STONEY)) {
1303                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1304                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1305                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1306                                              AMDGPU_GEM_DOMAIN_VRAM,
1307                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1308                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1309                                              NULL, NULL,
1310                                              &adev->gfx.rlc.cp_table_obj);
1311                         if (r) {
1312                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1313                                 return r;
1314                         }
1315                 }
1316
1317                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1318                 if (unlikely(r != 0)) {
1319                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1320                         return r;
1321                 }
1322                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1323                                   &adev->gfx.rlc.cp_table_gpu_addr);
1324                 if (r) {
1325                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1326                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1327                         return r;
1328                 }
1329                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1330                 if (r) {
1331                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1332                         return r;
1333                 }
1334
1335                 cz_init_cp_jump_table(adev);
1336
1337                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1338                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1339         }
1340
1341         return 0;
1342 }
1343
1344 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1345 {
1346         int r;
1347
1348         if (adev->gfx.mec.hpd_eop_obj) {
1349                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1350                 if (unlikely(r != 0))
1351                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1352                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1353                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1354                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1355                 adev->gfx.mec.hpd_eop_obj = NULL;
1356         }
1357 }
1358
1359 #define MEC_HPD_SIZE 2048
1360
1361 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1362 {
1363         int r;
1364         u32 *hpd;
1365
1366         /*
1367          * we assign only 1 pipe because all other pipes will
1368          * be handled by KFD
1369          */
1370         adev->gfx.mec.num_mec = 1;
1371         adev->gfx.mec.num_pipe = 1;
1372         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1373
1374         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1375                 r = amdgpu_bo_create(adev,
1376                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1377                                      PAGE_SIZE, true,
1378                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1379                                      &adev->gfx.mec.hpd_eop_obj);
1380                 if (r) {
1381                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1382                         return r;
1383                 }
1384         }
1385
1386         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1387         if (unlikely(r != 0)) {
1388                 gfx_v8_0_mec_fini(adev);
1389                 return r;
1390         }
1391         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1392                           &adev->gfx.mec.hpd_eop_gpu_addr);
1393         if (r) {
1394                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1395                 gfx_v8_0_mec_fini(adev);
1396                 return r;
1397         }
1398         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1399         if (r) {
1400                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1401                 gfx_v8_0_mec_fini(adev);
1402                 return r;
1403         }
1404
1405         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1406
1407         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1408         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1409
1410         return 0;
1411 }
1412
1413 static const u32 vgpr_init_compute_shader[] =
1414 {
1415         0x7e000209, 0x7e020208,
1416         0x7e040207, 0x7e060206,
1417         0x7e080205, 0x7e0a0204,
1418         0x7e0c0203, 0x7e0e0202,
1419         0x7e100201, 0x7e120200,
1420         0x7e140209, 0x7e160208,
1421         0x7e180207, 0x7e1a0206,
1422         0x7e1c0205, 0x7e1e0204,
1423         0x7e200203, 0x7e220202,
1424         0x7e240201, 0x7e260200,
1425         0x7e280209, 0x7e2a0208,
1426         0x7e2c0207, 0x7e2e0206,
1427         0x7e300205, 0x7e320204,
1428         0x7e340203, 0x7e360202,
1429         0x7e380201, 0x7e3a0200,
1430         0x7e3c0209, 0x7e3e0208,
1431         0x7e400207, 0x7e420206,
1432         0x7e440205, 0x7e460204,
1433         0x7e480203, 0x7e4a0202,
1434         0x7e4c0201, 0x7e4e0200,
1435         0x7e500209, 0x7e520208,
1436         0x7e540207, 0x7e560206,
1437         0x7e580205, 0x7e5a0204,
1438         0x7e5c0203, 0x7e5e0202,
1439         0x7e600201, 0x7e620200,
1440         0x7e640209, 0x7e660208,
1441         0x7e680207, 0x7e6a0206,
1442         0x7e6c0205, 0x7e6e0204,
1443         0x7e700203, 0x7e720202,
1444         0x7e740201, 0x7e760200,
1445         0x7e780209, 0x7e7a0208,
1446         0x7e7c0207, 0x7e7e0206,
1447         0xbf8a0000, 0xbf810000,
1448 };
1449
1450 static const u32 sgpr_init_compute_shader[] =
1451 {
1452         0xbe8a0100, 0xbe8c0102,
1453         0xbe8e0104, 0xbe900106,
1454         0xbe920108, 0xbe940100,
1455         0xbe960102, 0xbe980104,
1456         0xbe9a0106, 0xbe9c0108,
1457         0xbe9e0100, 0xbea00102,
1458         0xbea20104, 0xbea40106,
1459         0xbea60108, 0xbea80100,
1460         0xbeaa0102, 0xbeac0104,
1461         0xbeae0106, 0xbeb00108,
1462         0xbeb20100, 0xbeb40102,
1463         0xbeb60104, 0xbeb80106,
1464         0xbeba0108, 0xbebc0100,
1465         0xbebe0102, 0xbec00104,
1466         0xbec20106, 0xbec40108,
1467         0xbec60100, 0xbec80102,
1468         0xbee60004, 0xbee70005,
1469         0xbeea0006, 0xbeeb0007,
1470         0xbee80008, 0xbee90009,
1471         0xbefc0000, 0xbf8a0000,
1472         0xbf810000, 0x00000000,
1473 };
1474
1475 static const u32 vgpr_init_regs[] =
1476 {
1477         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1478         mmCOMPUTE_RESOURCE_LIMITS, 0,
1479         mmCOMPUTE_NUM_THREAD_X, 256*4,
1480         mmCOMPUTE_NUM_THREAD_Y, 1,
1481         mmCOMPUTE_NUM_THREAD_Z, 1,
1482         mmCOMPUTE_PGM_RSRC2, 20,
1483         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1484         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1485         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1486         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1487         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1488         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1489         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1490         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1491         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1492         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1493 };
1494
1495 static const u32 sgpr1_init_regs[] =
1496 {
1497         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1498         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1499         mmCOMPUTE_NUM_THREAD_X, 256*5,
1500         mmCOMPUTE_NUM_THREAD_Y, 1,
1501         mmCOMPUTE_NUM_THREAD_Z, 1,
1502         mmCOMPUTE_PGM_RSRC2, 20,
1503         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1504         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1505         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1506         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1507         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1508         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1509         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1510         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1511         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1512         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1513 };
1514
1515 static const u32 sgpr2_init_regs[] =
1516 {
1517         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1518         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1519         mmCOMPUTE_NUM_THREAD_X, 256*5,
1520         mmCOMPUTE_NUM_THREAD_Y, 1,
1521         mmCOMPUTE_NUM_THREAD_Z, 1,
1522         mmCOMPUTE_PGM_RSRC2, 20,
1523         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1524         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1525         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1526         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1527         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1528         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1529         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1530         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1531         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1532         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1533 };
1534
1535 static const u32 sec_ded_counter_registers[] =
1536 {
1537         mmCPC_EDC_ATC_CNT,
1538         mmCPC_EDC_SCRATCH_CNT,
1539         mmCPC_EDC_UCODE_CNT,
1540         mmCPF_EDC_ATC_CNT,
1541         mmCPF_EDC_ROQ_CNT,
1542         mmCPF_EDC_TAG_CNT,
1543         mmCPG_EDC_ATC_CNT,
1544         mmCPG_EDC_DMA_CNT,
1545         mmCPG_EDC_TAG_CNT,
1546         mmDC_EDC_CSINVOC_CNT,
1547         mmDC_EDC_RESTORE_CNT,
1548         mmDC_EDC_STATE_CNT,
1549         mmGDS_EDC_CNT,
1550         mmGDS_EDC_GRBM_CNT,
1551         mmGDS_EDC_OA_DED,
1552         mmSPI_EDC_CNT,
1553         mmSQC_ATC_EDC_GATCL1_CNT,
1554         mmSQC_EDC_CNT,
1555         mmSQ_EDC_DED_CNT,
1556         mmSQ_EDC_INFO,
1557         mmSQ_EDC_SEC_CNT,
1558         mmTCC_EDC_CNT,
1559         mmTCP_ATC_EDC_GATCL1_CNT,
1560         mmTCP_EDC_CNT,
1561         mmTD_EDC_CNT
1562 };
1563
1564 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1565 {
1566         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1567         struct amdgpu_ib ib;
1568         struct dma_fence *f = NULL;
1569         int r, i;
1570         u32 tmp;
1571         unsigned total_size, vgpr_offset, sgpr_offset;
1572         u64 gpu_addr;
1573
1574         /* only supported on CZ */
1575         if (adev->asic_type != CHIP_CARRIZO)
1576                 return 0;
1577
1578         /* bail if the compute ring is not ready */
1579         if (!ring->ready)
1580                 return 0;
1581
1582         tmp = RREG32(mmGB_EDC_MODE);
1583         WREG32(mmGB_EDC_MODE, 0);
1584
1585         total_size =
1586                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1587         total_size +=
1588                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1589         total_size +=
1590                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1591         total_size = ALIGN(total_size, 256);
1592         vgpr_offset = total_size;
1593         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1594         sgpr_offset = total_size;
1595         total_size += sizeof(sgpr_init_compute_shader);
1596
1597         /* allocate an indirect buffer to put the commands in */
1598         memset(&ib, 0, sizeof(ib));
1599         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1600         if (r) {
1601                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1602                 return r;
1603         }
1604
1605         /* load the compute shaders */
1606         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1607                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1608
1609         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1610                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1611
1612         /* init the ib length to 0 */
1613         ib.length_dw = 0;
1614
1615         /* VGPR */
1616         /* write the register state for the compute dispatch */
1617         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1618                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1619                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1620                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1621         }
1622         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1623         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1624         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1625         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1626         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1627         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1628
1629         /* write dispatch packet */
1630         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1631         ib.ptr[ib.length_dw++] = 8; /* x */
1632         ib.ptr[ib.length_dw++] = 1; /* y */
1633         ib.ptr[ib.length_dw++] = 1; /* z */
1634         ib.ptr[ib.length_dw++] =
1635                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1636
1637         /* write CS partial flush packet */
1638         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1639         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1640
1641         /* SGPR1 */
1642         /* write the register state for the compute dispatch */
1643         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1644                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1645                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1646                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1647         }
1648         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1649         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1650         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1651         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1652         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1653         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1654
1655         /* write dispatch packet */
1656         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1657         ib.ptr[ib.length_dw++] = 8; /* x */
1658         ib.ptr[ib.length_dw++] = 1; /* y */
1659         ib.ptr[ib.length_dw++] = 1; /* z */
1660         ib.ptr[ib.length_dw++] =
1661                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1662
1663         /* write CS partial flush packet */
1664         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1665         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1666
1667         /* SGPR2 */
1668         /* write the register state for the compute dispatch */
1669         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1670                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1671                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1672                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1673         }
1674         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1675         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1676         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1677         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1678         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1679         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1680
1681         /* write dispatch packet */
1682         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1683         ib.ptr[ib.length_dw++] = 8; /* x */
1684         ib.ptr[ib.length_dw++] = 1; /* y */
1685         ib.ptr[ib.length_dw++] = 1; /* z */
1686         ib.ptr[ib.length_dw++] =
1687                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1688
1689         /* write CS partial flush packet */
1690         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1691         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1692
1693         /* shedule the ib on the ring */
1694         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1695         if (r) {
1696                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1697                 goto fail;
1698         }
1699
1700         /* wait for the GPU to finish processing the IB */
1701         r = dma_fence_wait(f, false);
1702         if (r) {
1703                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1704                 goto fail;
1705         }
1706
1707         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1708         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1709         WREG32(mmGB_EDC_MODE, tmp);
1710
1711         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1712         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1713         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1714
1715
1716         /* read back registers to clear the counters */
1717         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1718                 RREG32(sec_ded_counter_registers[i]);
1719
1720 fail:
1721         amdgpu_ib_free(adev, &ib, NULL);
1722         dma_fence_put(f);
1723
1724         return r;
1725 }
1726
1727 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1728 {
1729         u32 gb_addr_config;
1730         u32 mc_shared_chmap, mc_arb_ramcfg;
1731         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1732         u32 tmp;
1733         int ret;
1734
1735         switch (adev->asic_type) {
1736         case CHIP_TOPAZ:
1737                 adev->gfx.config.max_shader_engines = 1;
1738                 adev->gfx.config.max_tile_pipes = 2;
1739                 adev->gfx.config.max_cu_per_sh = 6;
1740                 adev->gfx.config.max_sh_per_se = 1;
1741                 adev->gfx.config.max_backends_per_se = 2;
1742                 adev->gfx.config.max_texture_channel_caches = 2;
1743                 adev->gfx.config.max_gprs = 256;
1744                 adev->gfx.config.max_gs_threads = 32;
1745                 adev->gfx.config.max_hw_contexts = 8;
1746
1747                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1752                 break;
1753         case CHIP_FIJI:
1754                 adev->gfx.config.max_shader_engines = 4;
1755                 adev->gfx.config.max_tile_pipes = 16;
1756                 adev->gfx.config.max_cu_per_sh = 16;
1757                 adev->gfx.config.max_sh_per_se = 1;
1758                 adev->gfx.config.max_backends_per_se = 4;
1759                 adev->gfx.config.max_texture_channel_caches = 16;
1760                 adev->gfx.config.max_gprs = 256;
1761                 adev->gfx.config.max_gs_threads = 32;
1762                 adev->gfx.config.max_hw_contexts = 8;
1763
1764                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1765                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1766                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1767                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1768                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1769                 break;
1770         case CHIP_POLARIS11:
1771                 ret = amdgpu_atombios_get_gfx_info(adev);
1772                 if (ret)
1773                         return ret;
1774                 adev->gfx.config.max_gprs = 256;
1775                 adev->gfx.config.max_gs_threads = 32;
1776                 adev->gfx.config.max_hw_contexts = 8;
1777
1778                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1779                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1780                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1781                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1782                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1783                 break;
1784         case CHIP_POLARIS10:
1785                 ret = amdgpu_atombios_get_gfx_info(adev);
1786                 if (ret)
1787                         return ret;
1788                 adev->gfx.config.max_gprs = 256;
1789                 adev->gfx.config.max_gs_threads = 32;
1790                 adev->gfx.config.max_hw_contexts = 8;
1791
1792                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1793                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1794                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1795                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1796                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1797                 break;
1798         case CHIP_TONGA:
1799                 adev->gfx.config.max_shader_engines = 4;
1800                 adev->gfx.config.max_tile_pipes = 8;
1801                 adev->gfx.config.max_cu_per_sh = 8;
1802                 adev->gfx.config.max_sh_per_se = 1;
1803                 adev->gfx.config.max_backends_per_se = 2;
1804                 adev->gfx.config.max_texture_channel_caches = 8;
1805                 adev->gfx.config.max_gprs = 256;
1806                 adev->gfx.config.max_gs_threads = 32;
1807                 adev->gfx.config.max_hw_contexts = 8;
1808
1809                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1810                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1811                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1812                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1813                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1814                 break;
1815         case CHIP_CARRIZO:
1816                 adev->gfx.config.max_shader_engines = 1;
1817                 adev->gfx.config.max_tile_pipes = 2;
1818                 adev->gfx.config.max_sh_per_se = 1;
1819                 adev->gfx.config.max_backends_per_se = 2;
1820
1821                 switch (adev->pdev->revision) {
1822                 case 0xc4:
1823                 case 0x84:
1824                 case 0xc8:
1825                 case 0xcc:
1826                 case 0xe1:
1827                 case 0xe3:
1828                         /* B10 */
1829                         adev->gfx.config.max_cu_per_sh = 8;
1830                         break;
1831                 case 0xc5:
1832                 case 0x81:
1833                 case 0x85:
1834                 case 0xc9:
1835                 case 0xcd:
1836                 case 0xe2:
1837                 case 0xe4:
1838                         /* B8 */
1839                         adev->gfx.config.max_cu_per_sh = 6;
1840                         break;
1841                 case 0xc6:
1842                 case 0xca:
1843                 case 0xce:
1844                 case 0x88:
1845                         /* B6 */
1846                         adev->gfx.config.max_cu_per_sh = 6;
1847                         break;
1848                 case 0xc7:
1849                 case 0x87:
1850                 case 0xcb:
1851                 case 0xe5:
1852                 case 0x89:
1853                 default:
1854                         /* B4 */
1855                         adev->gfx.config.max_cu_per_sh = 4;
1856                         break;
1857                 }
1858
1859                 adev->gfx.config.max_texture_channel_caches = 2;
1860                 adev->gfx.config.max_gprs = 256;
1861                 adev->gfx.config.max_gs_threads = 32;
1862                 adev->gfx.config.max_hw_contexts = 8;
1863
1864                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869                 break;
1870         case CHIP_STONEY:
1871                 adev->gfx.config.max_shader_engines = 1;
1872                 adev->gfx.config.max_tile_pipes = 2;
1873                 adev->gfx.config.max_sh_per_se = 1;
1874                 adev->gfx.config.max_backends_per_se = 1;
1875
1876                 switch (adev->pdev->revision) {
1877                 case 0xc0:
1878                 case 0xc1:
1879                 case 0xc2:
1880                 case 0xc4:
1881                 case 0xc8:
1882                 case 0xc9:
1883                         adev->gfx.config.max_cu_per_sh = 3;
1884                         break;
1885                 case 0xd0:
1886                 case 0xd1:
1887                 case 0xd2:
1888                 default:
1889                         adev->gfx.config.max_cu_per_sh = 2;
1890                         break;
1891                 }
1892
1893                 adev->gfx.config.max_texture_channel_caches = 2;
1894                 adev->gfx.config.max_gprs = 256;
1895                 adev->gfx.config.max_gs_threads = 16;
1896                 adev->gfx.config.max_hw_contexts = 8;
1897
1898                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1903                 break;
1904         default:
1905                 adev->gfx.config.max_shader_engines = 2;
1906                 adev->gfx.config.max_tile_pipes = 4;
1907                 adev->gfx.config.max_cu_per_sh = 2;
1908                 adev->gfx.config.max_sh_per_se = 1;
1909                 adev->gfx.config.max_backends_per_se = 2;
1910                 adev->gfx.config.max_texture_channel_caches = 4;
1911                 adev->gfx.config.max_gprs = 256;
1912                 adev->gfx.config.max_gs_threads = 32;
1913                 adev->gfx.config.max_hw_contexts = 8;
1914
1915                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1916                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1917                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1918                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1919                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1920                 break;
1921         }
1922
1923         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1924         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1925         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1926
1927         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1928         adev->gfx.config.mem_max_burst_length_bytes = 256;
1929         if (adev->flags & AMD_IS_APU) {
1930                 /* Get memory bank mapping mode. */
1931                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1932                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1933                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1934
1935                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1936                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1937                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1938
1939                 /* Validate settings in case only one DIMM installed. */
1940                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1941                         dimm00_addr_map = 0;
1942                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1943                         dimm01_addr_map = 0;
1944                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1945                         dimm10_addr_map = 0;
1946                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1947                         dimm11_addr_map = 0;
1948
1949                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1950                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1951                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1952                         adev->gfx.config.mem_row_size_in_kb = 2;
1953                 else
1954                         adev->gfx.config.mem_row_size_in_kb = 1;
1955         } else {
1956                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1957                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1958                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1959                         adev->gfx.config.mem_row_size_in_kb = 4;
1960         }
1961
1962         adev->gfx.config.shader_engine_tile_size = 32;
1963         adev->gfx.config.num_gpus = 1;
1964         adev->gfx.config.multi_gpu_tile_size = 64;
1965
1966         /* fix up row size */
1967         switch (adev->gfx.config.mem_row_size_in_kb) {
1968         case 1:
1969         default:
1970                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1971                 break;
1972         case 2:
1973                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1974                 break;
1975         case 4:
1976                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1977                 break;
1978         }
1979         adev->gfx.config.gb_addr_config = gb_addr_config;
1980
1981         return 0;
1982 }
1983
1984 static int gfx_v8_0_sw_init(void *handle)
1985 {
1986         int i, r;
1987         struct amdgpu_ring *ring;
1988         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1989
1990         /* EOP Event */
1991         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1992         if (r)
1993                 return r;
1994
1995         /* Privileged reg */
1996         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1997         if (r)
1998                 return r;
1999
2000         /* Privileged inst */
2001         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2002         if (r)
2003                 return r;
2004
2005         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2006
2007         gfx_v8_0_scratch_init(adev);
2008
2009         r = gfx_v8_0_init_microcode(adev);
2010         if (r) {
2011                 DRM_ERROR("Failed to load gfx firmware!\n");
2012                 return r;
2013         }
2014
2015         r = gfx_v8_0_rlc_init(adev);
2016         if (r) {
2017                 DRM_ERROR("Failed to init rlc BOs!\n");
2018                 return r;
2019         }
2020
2021         r = gfx_v8_0_mec_init(adev);
2022         if (r) {
2023                 DRM_ERROR("Failed to init MEC BOs!\n");
2024                 return r;
2025         }
2026
2027         /* set up the gfx ring */
2028         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2029                 ring = &adev->gfx.gfx_ring[i];
2030                 ring->ring_obj = NULL;
2031                 sprintf(ring->name, "gfx");
2032                 /* no gfx doorbells on iceland */
2033                 if (adev->asic_type != CHIP_TOPAZ) {
2034                         ring->use_doorbell = true;
2035                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2036                 }
2037
2038                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2039                                      AMDGPU_CP_IRQ_GFX_EOP);
2040                 if (r)
2041                         return r;
2042         }
2043
2044         /* set up the compute queues */
2045         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2046                 unsigned irq_type;
2047
2048                 /* max 32 queues per MEC */
2049                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2050                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2051                         break;
2052                 }
2053                 ring = &adev->gfx.compute_ring[i];
2054                 ring->ring_obj = NULL;
2055                 ring->use_doorbell = true;
2056                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2057                 ring->me = 1; /* first MEC */
2058                 ring->pipe = i / 8;
2059                 ring->queue = i % 8;
2060                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2061                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2062                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2063                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2064                                      irq_type);
2065                 if (r)
2066                         return r;
2067         }
2068
2069         /* reserve GDS, GWS and OA resource for gfx */
2070         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2071                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2072                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2073         if (r)
2074                 return r;
2075
2076         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2077                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2078                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2079         if (r)
2080                 return r;
2081
2082         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2083                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2084                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2085         if (r)
2086                 return r;
2087
2088         adev->gfx.ce_ram_size = 0x8000;
2089
2090         r = gfx_v8_0_gpu_early_init(adev);
2091         if (r)
2092                 return r;
2093
2094         return 0;
2095 }
2096
2097 static int gfx_v8_0_sw_fini(void *handle)
2098 {
2099         int i;
2100         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2101
2102         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2103         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2104         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2105
2106         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2107                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2108         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2109                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2110
2111         gfx_v8_0_mec_fini(adev);
2112         gfx_v8_0_rlc_fini(adev);
2113         gfx_v8_0_free_microcode(adev);
2114
2115         return 0;
2116 }
2117
2118 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2119 {
2120         uint32_t *modearray, *mod2array;
2121         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2122         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2123         u32 reg_offset;
2124
2125         modearray = adev->gfx.config.tile_mode_array;
2126         mod2array = adev->gfx.config.macrotile_mode_array;
2127
2128         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2129                 modearray[reg_offset] = 0;
2130
2131         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2132                 mod2array[reg_offset] = 0;
2133
2134         switch (adev->asic_type) {
2135         case CHIP_TOPAZ:
2136                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2137                                 PIPE_CONFIG(ADDR_SURF_P2) |
2138                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2139                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2140                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2141                                 PIPE_CONFIG(ADDR_SURF_P2) |
2142                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2143                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2144                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                 PIPE_CONFIG(ADDR_SURF_P2) |
2146                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2147                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2151                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                 PIPE_CONFIG(ADDR_SURF_P2) |
2154                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2155                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2157                                 PIPE_CONFIG(ADDR_SURF_P2) |
2158                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2161                                 PIPE_CONFIG(ADDR_SURF_P2) |
2162                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2163                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2));
2166                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2167                                 PIPE_CONFIG(ADDR_SURF_P2) |
2168                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2169                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171                                  PIPE_CONFIG(ADDR_SURF_P2) |
2172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175                                  PIPE_CONFIG(ADDR_SURF_P2) |
2176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2178                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179                                  PIPE_CONFIG(ADDR_SURF_P2) |
2180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183                                  PIPE_CONFIG(ADDR_SURF_P2) |
2184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2194                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2198                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2202                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2206                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2223                                  PIPE_CONFIG(ADDR_SURF_P2) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227                                  PIPE_CONFIG(ADDR_SURF_P2) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2230                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231                                  PIPE_CONFIG(ADDR_SURF_P2) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2234                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2235                                  PIPE_CONFIG(ADDR_SURF_P2) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2238
2239                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2240                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2241                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2242                                 NUM_BANKS(ADDR_SURF_8_BANK));
2243                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2244                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246                                 NUM_BANKS(ADDR_SURF_8_BANK));
2247                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2248                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250                                 NUM_BANKS(ADDR_SURF_8_BANK));
2251                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2254                                 NUM_BANKS(ADDR_SURF_8_BANK));
2255                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                 NUM_BANKS(ADDR_SURF_8_BANK));
2259                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262                                 NUM_BANKS(ADDR_SURF_8_BANK));
2263                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2265                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266                                 NUM_BANKS(ADDR_SURF_8_BANK));
2267                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2268                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2269                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270                                 NUM_BANKS(ADDR_SURF_16_BANK));
2271                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2272                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274                                 NUM_BANKS(ADDR_SURF_16_BANK));
2275                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2276                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2277                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2278                                  NUM_BANKS(ADDR_SURF_16_BANK));
2279                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2281                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282                                  NUM_BANKS(ADDR_SURF_16_BANK));
2283                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2285                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286                                  NUM_BANKS(ADDR_SURF_16_BANK));
2287                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2289                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290                                  NUM_BANKS(ADDR_SURF_16_BANK));
2291                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2293                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2294                                  NUM_BANKS(ADDR_SURF_8_BANK));
2295
2296                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2297                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2298                             reg_offset != 23)
2299                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2300
2301                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2302                         if (reg_offset != 7)
2303                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2304
2305                 break;
2306         case CHIP_FIJI:
2307                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2311                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2315                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2328                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2336                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2337                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2341                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2344                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2349                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2353                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2354                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2357                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2374                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2377                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2382                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2385                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2398                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2406                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2410                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2425                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2429
2430                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2432                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433                                 NUM_BANKS(ADDR_SURF_8_BANK));
2434                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2436                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437                                 NUM_BANKS(ADDR_SURF_8_BANK));
2438                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441                                 NUM_BANKS(ADDR_SURF_8_BANK));
2442                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445                                 NUM_BANKS(ADDR_SURF_8_BANK));
2446                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2449                                 NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2453                                 NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457                                 NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461                                 NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465                                 NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469                                  NUM_BANKS(ADDR_SURF_8_BANK));
2470                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473                                  NUM_BANKS(ADDR_SURF_8_BANK));
2474                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477                                  NUM_BANKS(ADDR_SURF_8_BANK));
2478                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481                                  NUM_BANKS(ADDR_SURF_8_BANK));
2482                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485                                  NUM_BANKS(ADDR_SURF_4_BANK));
2486
2487                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2488                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2489
2490                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2491                         if (reg_offset != 7)
2492                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2493
2494                 break;
2495         case CHIP_TONGA:
2496                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2499                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2500                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2503                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2504                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2507                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2511                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2517                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2525                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2526                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2530                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2533                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2534                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2537                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2538                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2542                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2544                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2546                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2549                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2555                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2563                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2566                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2570                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2571                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2574                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2587                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2595                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2599                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2605                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2609                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2614                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2615                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2616                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2618
2619                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2626                                 NUM_BANKS(ADDR_SURF_16_BANK));
2627                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630                                 NUM_BANKS(ADDR_SURF_16_BANK));
2631                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2658                                  NUM_BANKS(ADDR_SURF_16_BANK));
2659                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2662                                  NUM_BANKS(ADDR_SURF_16_BANK));
2663                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2666                                  NUM_BANKS(ADDR_SURF_8_BANK));
2667                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2670                                  NUM_BANKS(ADDR_SURF_4_BANK));
2671                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674                                  NUM_BANKS(ADDR_SURF_4_BANK));
2675
2676                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2677                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2678
2679                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2680                         if (reg_offset != 7)
2681                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2682
2683                 break;
2684         case CHIP_POLARIS11:
2685                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2719                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807
2808                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811                                 NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816                                 NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2820                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2821                                 NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2825                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2826                                 NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841                                 NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2844                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2845                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846                                 NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2860                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861                                 NUM_BANKS(ADDR_SURF_16_BANK));
2862
2863                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2866                                 NUM_BANKS(ADDR_SURF_16_BANK));
2867
2868                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2871                                 NUM_BANKS(ADDR_SURF_8_BANK));
2872
2873                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2875                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2876                                 NUM_BANKS(ADDR_SURF_4_BANK));
2877
2878                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2879                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880
2881                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2882                         if (reg_offset != 7)
2883                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2884
2885                 break;
2886         case CHIP_POLARIS10:
2887                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2921                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2978                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009
3010                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013                                 NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023                                 NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028                                 NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033                                 NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3038                                 NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3043                                 NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3047                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048                                 NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053                                 NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3057                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3058                                 NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063                                 NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068                                 NUM_BANKS(ADDR_SURF_8_BANK));
3069
3070                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3072                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3073                                 NUM_BANKS(ADDR_SURF_4_BANK));
3074
3075                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3078                                 NUM_BANKS(ADDR_SURF_4_BANK));
3079
3080                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3081                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082
3083                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3084                         if (reg_offset != 7)
3085                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3086
3087                 break;
3088         case CHIP_STONEY:
3089                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094                                 PIPE_CONFIG(ADDR_SURF_P2) |
3095                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098                                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102                                 PIPE_CONFIG(ADDR_SURF_P2) |
3103                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3104                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P2) |
3107                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3108                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2) |
3111                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P2) |
3115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3118                                 PIPE_CONFIG(ADDR_SURF_P2));
3119                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3120                                 PIPE_CONFIG(ADDR_SURF_P2) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3122                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3184                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188                                  PIPE_CONFIG(ADDR_SURF_P2) |
3189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191
3192                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195                                 NUM_BANKS(ADDR_SURF_8_BANK));
3196                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199                                 NUM_BANKS(ADDR_SURF_8_BANK));
3200                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                                 NUM_BANKS(ADDR_SURF_8_BANK));
3204                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3207                                 NUM_BANKS(ADDR_SURF_8_BANK));
3208                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211                                 NUM_BANKS(ADDR_SURF_8_BANK));
3212                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215                                 NUM_BANKS(ADDR_SURF_8_BANK));
3216                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219                                 NUM_BANKS(ADDR_SURF_8_BANK));
3220                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                                 NUM_BANKS(ADDR_SURF_16_BANK));
3224                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                                 NUM_BANKS(ADDR_SURF_16_BANK));
3228                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3229                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231                                  NUM_BANKS(ADDR_SURF_16_BANK));
3232                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                  NUM_BANKS(ADDR_SURF_16_BANK));
3236                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3238                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239                                  NUM_BANKS(ADDR_SURF_16_BANK));
3240                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                  NUM_BANKS(ADDR_SURF_16_BANK));
3244                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247                                  NUM_BANKS(ADDR_SURF_8_BANK));
3248
3249                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3250                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251                             reg_offset != 23)
3252                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253
3254                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3255                         if (reg_offset != 7)
3256                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3257
3258                 break;
3259         default:
3260                 dev_warn(adev->dev,
3261                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3262                          adev->asic_type);
3263
3264         case CHIP_CARRIZO:
3265                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3266                                 PIPE_CONFIG(ADDR_SURF_P2) |
3267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3270                                 PIPE_CONFIG(ADDR_SURF_P2) |
3271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3274                                 PIPE_CONFIG(ADDR_SURF_P2) |
3275                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3278                                 PIPE_CONFIG(ADDR_SURF_P2) |
3279                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3280                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3281                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282                                 PIPE_CONFIG(ADDR_SURF_P2) |
3283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3286                                 PIPE_CONFIG(ADDR_SURF_P2) |
3287                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3290                                 PIPE_CONFIG(ADDR_SURF_P2) |
3291                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3294                                 PIPE_CONFIG(ADDR_SURF_P2));
3295                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3296                                 PIPE_CONFIG(ADDR_SURF_P2) |
3297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3298                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3303                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3307                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3359                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3360                                  PIPE_CONFIG(ADDR_SURF_P2) |
3361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3363                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3364                                  PIPE_CONFIG(ADDR_SURF_P2) |
3365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3367
3368                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371                                 NUM_BANKS(ADDR_SURF_8_BANK));
3372                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375                                 NUM_BANKS(ADDR_SURF_8_BANK));
3376                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379                                 NUM_BANKS(ADDR_SURF_8_BANK));
3380                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3383                                 NUM_BANKS(ADDR_SURF_8_BANK));
3384                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387                                 NUM_BANKS(ADDR_SURF_8_BANK));
3388                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3391                                 NUM_BANKS(ADDR_SURF_8_BANK));
3392                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395                                 NUM_BANKS(ADDR_SURF_8_BANK));
3396                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399                                 NUM_BANKS(ADDR_SURF_16_BANK));
3400                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3401                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3402                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3403                                 NUM_BANKS(ADDR_SURF_16_BANK));
3404                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3405                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3406                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407                                  NUM_BANKS(ADDR_SURF_16_BANK));
3408                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3409                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3410                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411                                  NUM_BANKS(ADDR_SURF_16_BANK));
3412                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3414                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415                                  NUM_BANKS(ADDR_SURF_16_BANK));
3416                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3417                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3418                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419                                  NUM_BANKS(ADDR_SURF_16_BANK));
3420                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3421                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3422                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3423                                  NUM_BANKS(ADDR_SURF_8_BANK));
3424
3425                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3426                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3427                             reg_offset != 23)
3428                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3429
3430                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3431                         if (reg_offset != 7)
3432                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3433
3434                 break;
3435         }
3436 }
3437
3438 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3439                                   u32 se_num, u32 sh_num, u32 instance)
3440 {
3441         u32 data;
3442
3443         if (instance == 0xffffffff)
3444                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3445         else
3446                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3447
3448         if (se_num == 0xffffffff)
3449                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3450         else
3451                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3452
3453         if (sh_num == 0xffffffff)
3454                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3455         else
3456                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3457
3458         WREG32(mmGRBM_GFX_INDEX, data);
3459 }
3460
3461 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3462 {
3463         return (u32)((1ULL << bit_width) - 1);
3464 }
3465
3466 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3467 {
3468         u32 data, mask;
3469
3470         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3471                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3472
3473         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3474
3475         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3476                                        adev->gfx.config.max_sh_per_se);
3477
3478         return (~data) & mask;
3479 }
3480
3481 static void
3482 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3483 {
3484         switch (adev->asic_type) {
3485         case CHIP_FIJI:
3486                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3487                           RB_XSEL2(1) | PKR_MAP(2) |
3488                           PKR_XSEL(1) | PKR_YSEL(1) |
3489                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3490                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3491                            SE_PAIR_YSEL(2);
3492                 break;
3493         case CHIP_TONGA:
3494         case CHIP_POLARIS10:
3495                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3496                           SE_XSEL(1) | SE_YSEL(1);
3497                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3498                            SE_PAIR_YSEL(2);
3499                 break;
3500         case CHIP_TOPAZ:
3501         case CHIP_CARRIZO:
3502                 *rconf |= RB_MAP_PKR0(2);
3503                 *rconf1 |= 0x0;
3504                 break;
3505         case CHIP_POLARIS11:
3506                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3507                           SE_XSEL(1) | SE_YSEL(1);
3508                 *rconf1 |= 0x0;
3509                 break;
3510         case CHIP_STONEY:
3511                 *rconf |= 0x0;
3512                 *rconf1 |= 0x0;
3513                 break;
3514         default:
3515                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3516                 break;
3517         }
3518 }
3519
3520 static void
3521 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3522                                         u32 raster_config, u32 raster_config_1,
3523                                         unsigned rb_mask, unsigned num_rb)
3524 {
3525         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3526         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3527         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3528         unsigned rb_per_se = num_rb / num_se;
3529         unsigned se_mask[4];
3530         unsigned se;
3531
3532         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3533         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3534         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3535         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3536
3537         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3538         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3539         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3540
3541         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3542                              (!se_mask[2] && !se_mask[3]))) {
3543                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3544
3545                 if (!se_mask[0] && !se_mask[1]) {
3546                         raster_config_1 |=
3547                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3548                 } else {
3549                         raster_config_1 |=
3550                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3551                 }
3552         }
3553
3554         for (se = 0; se < num_se; se++) {
3555                 unsigned raster_config_se = raster_config;
3556                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3557                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3558                 int idx = (se / 2) * 2;
3559
3560                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3561                         raster_config_se &= ~SE_MAP_MASK;
3562
3563                         if (!se_mask[idx]) {
3564                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3565                         } else {
3566                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3567                         }
3568                 }
3569
3570                 pkr0_mask &= rb_mask;
3571                 pkr1_mask &= rb_mask;
3572                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3573                         raster_config_se &= ~PKR_MAP_MASK;
3574
3575                         if (!pkr0_mask) {
3576                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3577                         } else {
3578                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3579                         }
3580                 }
3581
3582                 if (rb_per_se >= 2) {
3583                         unsigned rb0_mask = 1 << (se * rb_per_se);
3584                         unsigned rb1_mask = rb0_mask << 1;
3585
3586                         rb0_mask &= rb_mask;
3587                         rb1_mask &= rb_mask;
3588                         if (!rb0_mask || !rb1_mask) {
3589                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3590
3591                                 if (!rb0_mask) {
3592                                         raster_config_se |=
3593                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3594                                 } else {
3595                                         raster_config_se |=
3596                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3597                                 }
3598                         }
3599
3600                         if (rb_per_se > 2) {
3601                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3602                                 rb1_mask = rb0_mask << 1;
3603                                 rb0_mask &= rb_mask;
3604                                 rb1_mask &= rb_mask;
3605                                 if (!rb0_mask || !rb1_mask) {
3606                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3607
3608                                         if (!rb0_mask) {
3609                                                 raster_config_se |=
3610                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3611                                         } else {
3612                                                 raster_config_se |=
3613                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3614                                         }
3615                                 }
3616                         }
3617                 }
3618
3619                 /* GRBM_GFX_INDEX has a different offset on VI */
3620                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3621                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3622                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3623         }
3624
3625         /* GRBM_GFX_INDEX has a different offset on VI */
3626         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3627 }
3628
3629 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3630 {
3631         int i, j;
3632         u32 data;
3633         u32 raster_config = 0, raster_config_1 = 0;
3634         u32 active_rbs = 0;
3635         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3636                                         adev->gfx.config.max_sh_per_se;
3637         unsigned num_rb_pipes;
3638
3639         mutex_lock(&adev->grbm_idx_mutex);
3640         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3641                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3642                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3643                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3644                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3645                                                rb_bitmap_width_per_sh);
3646                 }
3647         }
3648         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3649
3650         adev->gfx.config.backend_enable_mask = active_rbs;
3651         adev->gfx.config.num_rbs = hweight32(active_rbs);
3652
3653         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3654                              adev->gfx.config.max_shader_engines, 16);
3655
3656         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3657
3658         if (!adev->gfx.config.backend_enable_mask ||
3659                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3660                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3661                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3662         } else {
3663                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3664                                                         adev->gfx.config.backend_enable_mask,
3665                                                         num_rb_pipes);
3666         }
3667
3668         /* cache the values for userspace */
3669         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3670                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3671                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3672                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3673                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3674                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3675                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3676                         adev->gfx.config.rb_config[i][j].raster_config =
3677                                 RREG32(mmPA_SC_RASTER_CONFIG);
3678                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3679                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3680                 }
3681         }
3682         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3683         mutex_unlock(&adev->grbm_idx_mutex);
3684 }
3685
3686 /**
3687  * gfx_v8_0_init_compute_vmid - gart enable
3688  *
3689  * @rdev: amdgpu_device pointer
3690  *
3691  * Initialize compute vmid sh_mem registers
3692  *
3693  */
3694 #define DEFAULT_SH_MEM_BASES    (0x6000)
3695 #define FIRST_COMPUTE_VMID      (8)
3696 #define LAST_COMPUTE_VMID       (16)
3697 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3698 {
3699         int i;
3700         uint32_t sh_mem_config;
3701         uint32_t sh_mem_bases;
3702
3703         /*
3704          * Configure apertures:
3705          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3706          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3707          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3708          */
3709         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3710
3711         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3712                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3713                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3714                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3715                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3716                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3717
3718         mutex_lock(&adev->srbm_mutex);
3719         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3720                 vi_srbm_select(adev, 0, 0, 0, i);
3721                 /* CP and shaders */
3722                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3723                 WREG32(mmSH_MEM_APE1_BASE, 1);
3724                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3725                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3726         }
3727         vi_srbm_select(adev, 0, 0, 0, 0);
3728         mutex_unlock(&adev->srbm_mutex);
3729 }
3730
3731 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3732 {
3733         u32 tmp;
3734         int i;
3735
3736         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3737         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3739         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3740
3741         gfx_v8_0_tiling_mode_table_init(adev);
3742         gfx_v8_0_setup_rb(adev);
3743         gfx_v8_0_get_cu_info(adev);
3744
3745         /* XXX SH_MEM regs */
3746         /* where to put LDS, scratch, GPUVM in FSA64 space */
3747         mutex_lock(&adev->srbm_mutex);
3748         for (i = 0; i < 16; i++) {
3749                 vi_srbm_select(adev, 0, 0, 0, i);
3750                 /* CP and shaders */
3751                 if (i == 0) {
3752                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3753                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3754                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3755                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3756                         WREG32(mmSH_MEM_CONFIG, tmp);
3757                 } else {
3758                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3759                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3760                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3761                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3762                         WREG32(mmSH_MEM_CONFIG, tmp);
3763                 }
3764
3765                 WREG32(mmSH_MEM_APE1_BASE, 1);
3766                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3767                 WREG32(mmSH_MEM_BASES, 0);
3768         }
3769         vi_srbm_select(adev, 0, 0, 0, 0);
3770         mutex_unlock(&adev->srbm_mutex);
3771
3772         gfx_v8_0_init_compute_vmid(adev);
3773
3774         mutex_lock(&adev->grbm_idx_mutex);
3775         /*
3776          * making sure that the following register writes will be broadcasted
3777          * to all the shaders
3778          */
3779         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3780
3781         WREG32(mmPA_SC_FIFO_SIZE,
3782                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3783                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3784                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3785                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3786                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3787                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3788                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3789                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3790         mutex_unlock(&adev->grbm_idx_mutex);
3791
3792 }
3793
3794 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3795 {
3796         u32 i, j, k;
3797         u32 mask;
3798
3799         mutex_lock(&adev->grbm_idx_mutex);
3800         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3801                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3802                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3803                         for (k = 0; k < adev->usec_timeout; k++) {
3804                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3805                                         break;
3806                                 udelay(1);
3807                         }
3808                 }
3809         }
3810         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3811         mutex_unlock(&adev->grbm_idx_mutex);
3812
3813         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3814                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3815                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3816                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3817         for (k = 0; k < adev->usec_timeout; k++) {
3818                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3819                         break;
3820                 udelay(1);
3821         }
3822 }
3823
3824 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3825                                                bool enable)
3826 {
3827         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3828
3829         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3830         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3831         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3832         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3833
3834         WREG32(mmCP_INT_CNTL_RING0, tmp);
3835 }
3836
3837 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3838 {
3839         /* csib */
3840         WREG32(mmRLC_CSIB_ADDR_HI,
3841                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3842         WREG32(mmRLC_CSIB_ADDR_LO,
3843                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3844         WREG32(mmRLC_CSIB_LENGTH,
3845                         adev->gfx.rlc.clear_state_size);
3846 }
3847
3848 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3849                                 int ind_offset,
3850                                 int list_size,
3851                                 int *unique_indices,
3852                                 int *indices_count,
3853                                 int max_indices,
3854                                 int *ind_start_offsets,
3855                                 int *offset_count,
3856                                 int max_offset)
3857 {
3858         int indices;
3859         bool new_entry = true;
3860
3861         for (; ind_offset < list_size; ind_offset++) {
3862
3863                 if (new_entry) {
3864                         new_entry = false;
3865                         ind_start_offsets[*offset_count] = ind_offset;
3866                         *offset_count = *offset_count + 1;
3867                         BUG_ON(*offset_count >= max_offset);
3868                 }
3869
3870                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3871                         new_entry = true;
3872                         continue;
3873                 }
3874
3875                 ind_offset += 2;
3876
3877                 /* look for the matching indice */
3878                 for (indices = 0;
3879                         indices < *indices_count;
3880                         indices++) {
3881                         if (unique_indices[indices] ==
3882                                 register_list_format[ind_offset])
3883                                 break;
3884                 }
3885
3886                 if (indices >= *indices_count) {
3887                         unique_indices[*indices_count] =
3888                                 register_list_format[ind_offset];
3889                         indices = *indices_count;
3890                         *indices_count = *indices_count + 1;
3891                         BUG_ON(*indices_count >= max_indices);
3892                 }
3893
3894                 register_list_format[ind_offset] = indices;
3895         }
3896 }
3897
3898 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3899 {
3900         int i, temp, data;
3901         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3902         int indices_count = 0;
3903         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3904         int offset_count = 0;
3905
3906         int list_size;
3907         unsigned int *register_list_format =
3908                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3909         if (!register_list_format)
3910                 return -ENOMEM;
3911         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3912                         adev->gfx.rlc.reg_list_format_size_bytes);
3913
3914         gfx_v8_0_parse_ind_reg_list(register_list_format,
3915                                 RLC_FormatDirectRegListLength,
3916                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3917                                 unique_indices,
3918                                 &indices_count,
3919                                 sizeof(unique_indices) / sizeof(int),
3920                                 indirect_start_offsets,
3921                                 &offset_count,
3922                                 sizeof(indirect_start_offsets)/sizeof(int));
3923
3924         /* save and restore list */
3925         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3926
3927         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3928         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3929                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3930
3931         /* indirect list */
3932         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3933         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3934                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3935
3936         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3937         list_size = list_size >> 1;
3938         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3939         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3940
3941         /* starting offsets starts */
3942         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3943                 adev->gfx.rlc.starting_offsets_start);
3944         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3945                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3946                                 indirect_start_offsets[i]);
3947
3948         /* unique indices */
3949         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3950         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3951         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3952                 if (unique_indices[i] != 0) {
3953                         amdgpu_mm_wreg(adev, temp + i,
3954                                         unique_indices[i] & 0x3FFFF, false);
3955                         amdgpu_mm_wreg(adev, data + i,
3956                                         unique_indices[i] >> 20, false);
3957                 }
3958         }
3959         kfree(register_list_format);
3960
3961         return 0;
3962 }
3963
3964 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3965 {
3966         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3967 }
3968
3969 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3970 {
3971         uint32_t data;
3972
3973         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3974
3975         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3976         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3977         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3978         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3979         WREG32(mmRLC_PG_DELAY, data);
3980
3981         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3982         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3983
3984 }
3985
3986 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3987                                                 bool enable)
3988 {
3989         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3990 }
3991
3992 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3993                                                   bool enable)
3994 {
3995         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3996 }
3997
3998 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3999 {
4000         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4001 }
4002
4003 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4004 {
4005         if ((adev->asic_type == CHIP_CARRIZO) ||
4006             (adev->asic_type == CHIP_STONEY)) {
4007                 gfx_v8_0_init_csb(adev);
4008                 gfx_v8_0_init_save_restore_list(adev);
4009                 gfx_v8_0_enable_save_restore_machine(adev);
4010                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4011                 gfx_v8_0_init_power_gating(adev);
4012                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4013                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4014                         cz_enable_sck_slow_down_on_power_up(adev, true);
4015                         cz_enable_sck_slow_down_on_power_down(adev, true);
4016                 } else {
4017                         cz_enable_sck_slow_down_on_power_up(adev, false);
4018                         cz_enable_sck_slow_down_on_power_down(adev, false);
4019                 }
4020                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4021                         cz_enable_cp_power_gating(adev, true);
4022                 else
4023                         cz_enable_cp_power_gating(adev, false);
4024         } else if (adev->asic_type == CHIP_POLARIS11) {
4025                 gfx_v8_0_init_csb(adev);
4026                 gfx_v8_0_init_save_restore_list(adev);
4027                 gfx_v8_0_enable_save_restore_machine(adev);
4028                 gfx_v8_0_init_power_gating(adev);
4029         }
4030
4031 }
4032
4033 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4034 {
4035         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4036
4037         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4038         gfx_v8_0_wait_for_rlc_serdes(adev);
4039 }
4040
4041 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4042 {
4043         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4044         udelay(50);
4045
4046         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4047         udelay(50);
4048 }
4049
4050 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4051 {
4052         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4053
4054         /* carrizo do enable cp interrupt after cp inited */
4055         if (!(adev->flags & AMD_IS_APU))
4056                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4057
4058         udelay(50);
4059 }
4060
4061 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4062 {
4063         const struct rlc_firmware_header_v2_0 *hdr;
4064         const __le32 *fw_data;
4065         unsigned i, fw_size;
4066
4067         if (!adev->gfx.rlc_fw)
4068                 return -EINVAL;
4069
4070         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4071         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4072
4073         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4074                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4075         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4076
4077         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4078         for (i = 0; i < fw_size; i++)
4079                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4080         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4081
4082         return 0;
4083 }
4084
4085 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4086 {
4087         int r;
4088         u32 tmp;
4089
4090         gfx_v8_0_rlc_stop(adev);
4091
4092         /* disable CG */
4093         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4094         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4095                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4096         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4097         if (adev->asic_type == CHIP_POLARIS11 ||
4098             adev->asic_type == CHIP_POLARIS10) {
4099                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4100                 tmp &= ~0x3;
4101                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4102         }
4103
4104         /* disable PG */
4105         WREG32(mmRLC_PG_CNTL, 0);
4106
4107         gfx_v8_0_rlc_reset(adev);
4108         gfx_v8_0_init_pg(adev);
4109
4110         if (!adev->pp_enabled) {
4111                 if (!adev->firmware.smu_load) {
4112                         /* legacy rlc firmware loading */
4113                         r = gfx_v8_0_rlc_load_microcode(adev);
4114                         if (r)
4115                                 return r;
4116                 } else {
4117                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4118                                                         AMDGPU_UCODE_ID_RLC_G);
4119                         if (r)
4120                                 return -EINVAL;
4121                 }
4122         }
4123
4124         gfx_v8_0_rlc_start(adev);
4125
4126         return 0;
4127 }
4128
4129 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4130 {
4131         int i;
4132         u32 tmp = RREG32(mmCP_ME_CNTL);
4133
4134         if (enable) {
4135                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4136                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4137                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4138         } else {
4139                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4140                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4141                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4142                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4143                         adev->gfx.gfx_ring[i].ready = false;
4144         }
4145         WREG32(mmCP_ME_CNTL, tmp);
4146         udelay(50);
4147 }
4148
4149 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4150 {
4151         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4152         const struct gfx_firmware_header_v1_0 *ce_hdr;
4153         const struct gfx_firmware_header_v1_0 *me_hdr;
4154         const __le32 *fw_data;
4155         unsigned i, fw_size;
4156
4157         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4158                 return -EINVAL;
4159
4160         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4161                 adev->gfx.pfp_fw->data;
4162         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4163                 adev->gfx.ce_fw->data;
4164         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4165                 adev->gfx.me_fw->data;
4166
4167         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4168         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4169         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4170
4171         gfx_v8_0_cp_gfx_enable(adev, false);
4172
4173         /* PFP */
4174         fw_data = (const __le32 *)
4175                 (adev->gfx.pfp_fw->data +
4176                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4177         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4178         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4179         for (i = 0; i < fw_size; i++)
4180                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4181         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4182
4183         /* CE */
4184         fw_data = (const __le32 *)
4185                 (adev->gfx.ce_fw->data +
4186                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4187         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4188         WREG32(mmCP_CE_UCODE_ADDR, 0);
4189         for (i = 0; i < fw_size; i++)
4190                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4191         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4192
4193         /* ME */
4194         fw_data = (const __le32 *)
4195                 (adev->gfx.me_fw->data +
4196                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4197         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4198         WREG32(mmCP_ME_RAM_WADDR, 0);
4199         for (i = 0; i < fw_size; i++)
4200                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4201         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4202
4203         return 0;
4204 }
4205
4206 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4207 {
4208         u32 count = 0;
4209         const struct cs_section_def *sect = NULL;
4210         const struct cs_extent_def *ext = NULL;
4211
4212         /* begin clear state */
4213         count += 2;
4214         /* context control state */
4215         count += 3;
4216
4217         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4218                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4219                         if (sect->id == SECT_CONTEXT)
4220                                 count += 2 + ext->reg_count;
4221                         else
4222                                 return 0;
4223                 }
4224         }
4225         /* pa_sc_raster_config/pa_sc_raster_config1 */
4226         count += 4;
4227         /* end clear state */
4228         count += 2;
4229         /* clear state */
4230         count += 2;
4231
4232         return count;
4233 }
4234
4235 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4236 {
4237         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4238         const struct cs_section_def *sect = NULL;
4239         const struct cs_extent_def *ext = NULL;
4240         int r, i;
4241
4242         /* init the CP */
4243         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4244         WREG32(mmCP_ENDIAN_SWAP, 0);
4245         WREG32(mmCP_DEVICE_ID, 1);
4246
4247         gfx_v8_0_cp_gfx_enable(adev, true);
4248
4249         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4250         if (r) {
4251                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4252                 return r;
4253         }
4254
4255         /* clear state buffer */
4256         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4257         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4258
4259         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4260         amdgpu_ring_write(ring, 0x80000000);
4261         amdgpu_ring_write(ring, 0x80000000);
4262
4263         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4264                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4265                         if (sect->id == SECT_CONTEXT) {
4266                                 amdgpu_ring_write(ring,
4267                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4268                                                ext->reg_count));
4269                                 amdgpu_ring_write(ring,
4270                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4271                                 for (i = 0; i < ext->reg_count; i++)
4272                                         amdgpu_ring_write(ring, ext->extent[i]);
4273                         }
4274                 }
4275         }
4276
4277         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4278         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4279         switch (adev->asic_type) {
4280         case CHIP_TONGA:
4281         case CHIP_POLARIS10:
4282                 amdgpu_ring_write(ring, 0x16000012);
4283                 amdgpu_ring_write(ring, 0x0000002A);
4284                 break;
4285         case CHIP_POLARIS11:
4286                 amdgpu_ring_write(ring, 0x16000012);
4287                 amdgpu_ring_write(ring, 0x00000000);
4288                 break;
4289         case CHIP_FIJI:
4290                 amdgpu_ring_write(ring, 0x3a00161a);
4291                 amdgpu_ring_write(ring, 0x0000002e);
4292                 break;
4293         case CHIP_CARRIZO:
4294                 amdgpu_ring_write(ring, 0x00000002);
4295                 amdgpu_ring_write(ring, 0x00000000);
4296                 break;
4297         case CHIP_TOPAZ:
4298                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4299                                 0x00000000 : 0x00000002);
4300                 amdgpu_ring_write(ring, 0x00000000);
4301                 break;
4302         case CHIP_STONEY:
4303                 amdgpu_ring_write(ring, 0x00000000);
4304                 amdgpu_ring_write(ring, 0x00000000);
4305                 break;
4306         default:
4307                 BUG();
4308         }
4309
4310         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4311         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4312
4313         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4314         amdgpu_ring_write(ring, 0);
4315
4316         /* init the CE partitions */
4317         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4318         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4319         amdgpu_ring_write(ring, 0x8000);
4320         amdgpu_ring_write(ring, 0x8000);
4321
4322         amdgpu_ring_commit(ring);
4323
4324         return 0;
4325 }
4326
4327 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4328 {
4329         struct amdgpu_ring *ring;
4330         u32 tmp;
4331         u32 rb_bufsz;
4332         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4333         int r;
4334
4335         /* Set the write pointer delay */
4336         WREG32(mmCP_RB_WPTR_DELAY, 0);
4337
4338         /* set the RB to use vmid 0 */
4339         WREG32(mmCP_RB_VMID, 0);
4340
4341         /* Set ring buffer size */
4342         ring = &adev->gfx.gfx_ring[0];
4343         rb_bufsz = order_base_2(ring->ring_size / 8);
4344         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4345         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4346         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4347         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4348 #ifdef __BIG_ENDIAN
4349         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4350 #endif
4351         WREG32(mmCP_RB0_CNTL, tmp);
4352
4353         /* Initialize the ring buffer's read and write pointers */
4354         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4355         ring->wptr = 0;
4356         WREG32(mmCP_RB0_WPTR, ring->wptr);
4357
4358         /* set the wb address wether it's enabled or not */
4359         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4360         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4361         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4362
4363         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4364         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4365         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4366         mdelay(1);
4367         WREG32(mmCP_RB0_CNTL, tmp);
4368
4369         rb_addr = ring->gpu_addr >> 8;
4370         WREG32(mmCP_RB0_BASE, rb_addr);
4371         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4372
4373         /* no gfx doorbells on iceland */
4374         if (adev->asic_type != CHIP_TOPAZ) {
4375                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4376                 if (ring->use_doorbell) {
4377                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4378                                             DOORBELL_OFFSET, ring->doorbell_index);
4379                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4380                                             DOORBELL_HIT, 0);
4381                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4382                                             DOORBELL_EN, 1);
4383                 } else {
4384                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4385                                             DOORBELL_EN, 0);
4386                 }
4387                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4388
4389                 if (adev->asic_type == CHIP_TONGA) {
4390                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4391                                             DOORBELL_RANGE_LOWER,
4392                                             AMDGPU_DOORBELL_GFX_RING0);
4393                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4394
4395                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4396                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4397                 }
4398
4399         }
4400
4401         /* start the ring */
4402         gfx_v8_0_cp_gfx_start(adev);
4403         ring->ready = true;
4404         r = amdgpu_ring_test_ring(ring);
4405         if (r)
4406                 ring->ready = false;
4407
4408         return r;
4409 }
4410
4411 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4412 {
4413         int i;
4414
4415         if (enable) {
4416                 WREG32(mmCP_MEC_CNTL, 0);
4417         } else {
4418                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4419                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4420                         adev->gfx.compute_ring[i].ready = false;
4421         }
4422         udelay(50);
4423 }
4424
4425 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4426 {
4427         const struct gfx_firmware_header_v1_0 *mec_hdr;
4428         const __le32 *fw_data;
4429         unsigned i, fw_size;
4430
4431         if (!adev->gfx.mec_fw)
4432                 return -EINVAL;
4433
4434         gfx_v8_0_cp_compute_enable(adev, false);
4435
4436         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4437         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4438
4439         fw_data = (const __le32 *)
4440                 (adev->gfx.mec_fw->data +
4441                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4442         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4443
4444         /* MEC1 */
4445         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4446         for (i = 0; i < fw_size; i++)
4447                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4448         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4449
4450         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4451         if (adev->gfx.mec2_fw) {
4452                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4453
4454                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4455                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4456
4457                 fw_data = (const __le32 *)
4458                         (adev->gfx.mec2_fw->data +
4459                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4460                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4461
4462                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4463                 for (i = 0; i < fw_size; i++)
4464                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4465                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4466         }
4467
4468         return 0;
4469 }
4470
4471 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4472 {
4473         int i, r;
4474
4475         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4476                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4477
4478                 if (ring->mqd_obj) {
4479                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4480                         if (unlikely(r != 0))
4481                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4482
4483                         amdgpu_bo_unpin(ring->mqd_obj);
4484                         amdgpu_bo_unreserve(ring->mqd_obj);
4485
4486                         amdgpu_bo_unref(&ring->mqd_obj);
4487                         ring->mqd_obj = NULL;
4488                 }
4489         }
4490 }
4491
4492 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4493 {
4494         int r, i, j;
4495         u32 tmp;
4496         bool use_doorbell = true;
4497         u64 hqd_gpu_addr;
4498         u64 mqd_gpu_addr;
4499         u64 eop_gpu_addr;
4500         u64 wb_gpu_addr;
4501         u32 *buf;
4502         struct vi_mqd *mqd;
4503
4504         /* init the queues.  */
4505         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4506                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4507
4508                 if (ring->mqd_obj == NULL) {
4509                         r = amdgpu_bo_create(adev,
4510                                              sizeof(struct vi_mqd),
4511                                              PAGE_SIZE, true,
4512                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4513                                              NULL, &ring->mqd_obj);
4514                         if (r) {
4515                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4516                                 return r;
4517                         }
4518                 }
4519
4520                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4521                 if (unlikely(r != 0)) {
4522                         gfx_v8_0_cp_compute_fini(adev);
4523                         return r;
4524                 }
4525                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4526                                   &mqd_gpu_addr);
4527                 if (r) {
4528                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4529                         gfx_v8_0_cp_compute_fini(adev);
4530                         return r;
4531                 }
4532                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4533                 if (r) {
4534                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4535                         gfx_v8_0_cp_compute_fini(adev);
4536                         return r;
4537                 }
4538
4539                 /* init the mqd struct */
4540                 memset(buf, 0, sizeof(struct vi_mqd));
4541
4542                 mqd = (struct vi_mqd *)buf;
4543                 mqd->header = 0xC0310800;
4544                 mqd->compute_pipelinestat_enable = 0x00000001;
4545                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4546                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4547                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4548                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4549                 mqd->compute_misc_reserved = 0x00000003;
4550
4551                 mutex_lock(&adev->srbm_mutex);
4552                 vi_srbm_select(adev, ring->me,
4553                                ring->pipe,
4554                                ring->queue, 0);
4555
4556                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4557                 eop_gpu_addr >>= 8;
4558
4559                 /* write the EOP addr */
4560                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4561                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4562
4563                 /* set the VMID assigned */
4564                 WREG32(mmCP_HQD_VMID, 0);
4565
4566                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4567                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4568                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4569                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4570                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4571
4572                 /* disable wptr polling */
4573                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4574                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4575                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4576
4577                 mqd->cp_hqd_eop_base_addr_lo =
4578                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4579                 mqd->cp_hqd_eop_base_addr_hi =
4580                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4581
4582                 /* enable doorbell? */
4583                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4584                 if (use_doorbell) {
4585                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4586                 } else {
4587                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4588                 }
4589                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4590                 mqd->cp_hqd_pq_doorbell_control = tmp;
4591
4592                 /* disable the queue if it's active */
4593                 mqd->cp_hqd_dequeue_request = 0;
4594                 mqd->cp_hqd_pq_rptr = 0;
4595                 mqd->cp_hqd_pq_wptr= 0;
4596                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4597                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4598                         for (j = 0; j < adev->usec_timeout; j++) {
4599                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4600                                         break;
4601                                 udelay(1);
4602                         }
4603                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4604                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4605                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4606                 }
4607
4608                 /* set the pointer to the MQD */
4609                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4610                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4611                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4612                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4613
4614                 /* set MQD vmid to 0 */
4615                 tmp = RREG32(mmCP_MQD_CONTROL);
4616                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4617                 WREG32(mmCP_MQD_CONTROL, tmp);
4618                 mqd->cp_mqd_control = tmp;
4619
4620                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4621                 hqd_gpu_addr = ring->gpu_addr >> 8;
4622                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4623                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4624                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4625                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4626
4627                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4628                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4629                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4630                                     (order_base_2(ring->ring_size / 4) - 1));
4631                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4632                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4633 #ifdef __BIG_ENDIAN
4634                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4635 #endif
4636                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4637                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4638                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4639                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4640                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4641                 mqd->cp_hqd_pq_control = tmp;
4642
4643                 /* set the wb address wether it's enabled or not */
4644                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4645                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4646                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4647                         upper_32_bits(wb_gpu_addr) & 0xffff;
4648                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4649                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4650                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4651                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4652
4653                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4654                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4655                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4656                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4657                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4658                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4659                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4660
4661                 /* enable the doorbell if requested */
4662                 if (use_doorbell) {
4663                         if ((adev->asic_type == CHIP_CARRIZO) ||
4664                             (adev->asic_type == CHIP_FIJI) ||
4665                             (adev->asic_type == CHIP_STONEY) ||
4666                             (adev->asic_type == CHIP_POLARIS11) ||
4667                             (adev->asic_type == CHIP_POLARIS10)) {
4668                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4669                                        AMDGPU_DOORBELL_KIQ << 2);
4670                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4671                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4672                         }
4673                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4674                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4675                                             DOORBELL_OFFSET, ring->doorbell_index);
4676                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4677                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4678                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4679                         mqd->cp_hqd_pq_doorbell_control = tmp;
4680
4681                 } else {
4682                         mqd->cp_hqd_pq_doorbell_control = 0;
4683                 }
4684                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4685                        mqd->cp_hqd_pq_doorbell_control);
4686
4687                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4688                 ring->wptr = 0;
4689                 mqd->cp_hqd_pq_wptr = ring->wptr;
4690                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4691                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4692
4693                 /* set the vmid for the queue */
4694                 mqd->cp_hqd_vmid = 0;
4695                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4696
4697                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4698                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4699                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4700                 mqd->cp_hqd_persistent_state = tmp;
4701                 if (adev->asic_type == CHIP_STONEY ||
4702                         adev->asic_type == CHIP_POLARIS11 ||
4703                         adev->asic_type == CHIP_POLARIS10) {
4704                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4705                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4706                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4707                 }
4708
4709                 /* activate the queue */
4710                 mqd->cp_hqd_active = 1;
4711                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4712
4713                 vi_srbm_select(adev, 0, 0, 0, 0);
4714                 mutex_unlock(&adev->srbm_mutex);
4715
4716                 amdgpu_bo_kunmap(ring->mqd_obj);
4717                 amdgpu_bo_unreserve(ring->mqd_obj);
4718         }
4719
4720         if (use_doorbell) {
4721                 tmp = RREG32(mmCP_PQ_STATUS);
4722                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4723                 WREG32(mmCP_PQ_STATUS, tmp);
4724         }
4725
4726         gfx_v8_0_cp_compute_enable(adev, true);
4727
4728         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4729                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4730
4731                 ring->ready = true;
4732                 r = amdgpu_ring_test_ring(ring);
4733                 if (r)
4734                         ring->ready = false;
4735         }
4736
4737         return 0;
4738 }
4739
4740 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4741 {
4742         int r;
4743
4744         if (!(adev->flags & AMD_IS_APU))
4745                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4746
4747         if (!adev->pp_enabled) {
4748                 if (!adev->firmware.smu_load) {
4749                         /* legacy firmware loading */
4750                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4751                         if (r)
4752                                 return r;
4753
4754                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4755                         if (r)
4756                                 return r;
4757                 } else {
4758                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4759                                                         AMDGPU_UCODE_ID_CP_CE);
4760                         if (r)
4761                                 return -EINVAL;
4762
4763                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4764                                                         AMDGPU_UCODE_ID_CP_PFP);
4765                         if (r)
4766                                 return -EINVAL;
4767
4768                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4769                                                         AMDGPU_UCODE_ID_CP_ME);
4770                         if (r)
4771                                 return -EINVAL;
4772
4773                         if (adev->asic_type == CHIP_TOPAZ) {
4774                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4775                                 if (r)
4776                                         return r;
4777                         } else {
4778                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4779                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4780                                 if (r)
4781                                         return -EINVAL;
4782                         }
4783                 }
4784         }
4785
4786         r = gfx_v8_0_cp_gfx_resume(adev);
4787         if (r)
4788                 return r;
4789
4790         r = gfx_v8_0_cp_compute_resume(adev);
4791         if (r)
4792                 return r;
4793
4794         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4795
4796         return 0;
4797 }
4798
4799 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4800 {
4801         gfx_v8_0_cp_gfx_enable(adev, enable);
4802         gfx_v8_0_cp_compute_enable(adev, enable);
4803 }
4804
4805 static int gfx_v8_0_hw_init(void *handle)
4806 {
4807         int r;
4808         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4809
4810         gfx_v8_0_init_golden_registers(adev);
4811         gfx_v8_0_gpu_init(adev);
4812
4813         r = gfx_v8_0_rlc_resume(adev);
4814         if (r)
4815                 return r;
4816
4817         r = gfx_v8_0_cp_resume(adev);
4818
4819         return r;
4820 }
4821
4822 static int gfx_v8_0_hw_fini(void *handle)
4823 {
4824         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4825
4826         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4827         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4828         if (amdgpu_sriov_vf(adev)) {
4829                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4830                 return 0;
4831         }
4832         gfx_v8_0_cp_enable(adev, false);
4833         gfx_v8_0_rlc_stop(adev);
4834         gfx_v8_0_cp_compute_fini(adev);
4835
4836         amdgpu_set_powergating_state(adev,
4837                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4838
4839         return 0;
4840 }
4841
4842 static int gfx_v8_0_suspend(void *handle)
4843 {
4844         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4845
4846         return gfx_v8_0_hw_fini(adev);
4847 }
4848
4849 static int gfx_v8_0_resume(void *handle)
4850 {
4851         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4852
4853         return gfx_v8_0_hw_init(adev);
4854 }
4855
4856 static bool gfx_v8_0_is_idle(void *handle)
4857 {
4858         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4859
4860         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4861                 return false;
4862         else
4863                 return true;
4864 }
4865
4866 static int gfx_v8_0_wait_for_idle(void *handle)
4867 {
4868         unsigned i;
4869         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4870
4871         for (i = 0; i < adev->usec_timeout; i++) {
4872                 if (gfx_v8_0_is_idle(handle))
4873                         return 0;
4874
4875                 udelay(1);
4876         }
4877         return -ETIMEDOUT;
4878 }
4879
4880 static bool gfx_v8_0_check_soft_reset(void *handle)
4881 {
4882         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4883         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4884         u32 tmp;
4885
4886         /* GRBM_STATUS */
4887         tmp = RREG32(mmGRBM_STATUS);
4888         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4889                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4890                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4891                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4892                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4893                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4894                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4895                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4896                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4897                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4898                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4899                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4900                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4901         }
4902
4903         /* GRBM_STATUS2 */
4904         tmp = RREG32(mmGRBM_STATUS2);
4905         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4906                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4907                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4908
4909         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4910             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4911             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4912                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4913                                                 SOFT_RESET_CPF, 1);
4914                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4915                                                 SOFT_RESET_CPC, 1);
4916                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4917                                                 SOFT_RESET_CPG, 1);
4918                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4919                                                 SOFT_RESET_GRBM, 1);
4920         }
4921
4922         /* SRBM_STATUS */
4923         tmp = RREG32(mmSRBM_STATUS);
4924         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4925                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4926                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4927         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4928                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4929                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4930
4931         if (grbm_soft_reset || srbm_soft_reset) {
4932                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4933                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4934                 return true;
4935         } else {
4936                 adev->gfx.grbm_soft_reset = 0;
4937                 adev->gfx.srbm_soft_reset = 0;
4938                 return false;
4939         }
4940 }
4941
4942 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
4943                                   struct amdgpu_ring *ring)
4944 {
4945         int i;
4946
4947         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4948         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4949                 u32 tmp;
4950                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
4951                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
4952                                     DEQUEUE_REQ, 2);
4953                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
4954                 for (i = 0; i < adev->usec_timeout; i++) {
4955                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4956                                 break;
4957                         udelay(1);
4958                 }
4959         }
4960 }
4961
4962 static int gfx_v8_0_pre_soft_reset(void *handle)
4963 {
4964         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4965         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4966
4967         if ((!adev->gfx.grbm_soft_reset) &&
4968             (!adev->gfx.srbm_soft_reset))
4969                 return 0;
4970
4971         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4972         srbm_soft_reset = adev->gfx.srbm_soft_reset;
4973
4974         /* stop the rlc */
4975         gfx_v8_0_rlc_stop(adev);
4976
4977         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4978             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4979                 /* Disable GFX parsing/prefetching */
4980                 gfx_v8_0_cp_gfx_enable(adev, false);
4981
4982         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4983             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4984             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4985             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4986                 int i;
4987
4988                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4989                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4990
4991                         gfx_v8_0_inactive_hqd(adev, ring);
4992                 }
4993                 /* Disable MEC parsing/prefetching */
4994                 gfx_v8_0_cp_compute_enable(adev, false);
4995         }
4996
4997        return 0;
4998 }
4999
5000 static int gfx_v8_0_soft_reset(void *handle)
5001 {
5002         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5003         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5004         u32 tmp;
5005
5006         if ((!adev->gfx.grbm_soft_reset) &&
5007             (!adev->gfx.srbm_soft_reset))
5008                 return 0;
5009
5010         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5011         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5012
5013         if (grbm_soft_reset || srbm_soft_reset) {
5014                 tmp = RREG32(mmGMCON_DEBUG);
5015                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5016                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5017                 WREG32(mmGMCON_DEBUG, tmp);
5018                 udelay(50);
5019         }
5020
5021         if (grbm_soft_reset) {
5022                 tmp = RREG32(mmGRBM_SOFT_RESET);
5023                 tmp |= grbm_soft_reset;
5024                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5025                 WREG32(mmGRBM_SOFT_RESET, tmp);
5026                 tmp = RREG32(mmGRBM_SOFT_RESET);
5027
5028                 udelay(50);
5029
5030                 tmp &= ~grbm_soft_reset;
5031                 WREG32(mmGRBM_SOFT_RESET, tmp);
5032                 tmp = RREG32(mmGRBM_SOFT_RESET);
5033         }
5034
5035         if (srbm_soft_reset) {
5036                 tmp = RREG32(mmSRBM_SOFT_RESET);
5037                 tmp |= srbm_soft_reset;
5038                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5039                 WREG32(mmSRBM_SOFT_RESET, tmp);
5040                 tmp = RREG32(mmSRBM_SOFT_RESET);
5041
5042                 udelay(50);
5043
5044                 tmp &= ~srbm_soft_reset;
5045                 WREG32(mmSRBM_SOFT_RESET, tmp);
5046                 tmp = RREG32(mmSRBM_SOFT_RESET);
5047         }
5048
5049         if (grbm_soft_reset || srbm_soft_reset) {
5050                 tmp = RREG32(mmGMCON_DEBUG);
5051                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5052                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5053                 WREG32(mmGMCON_DEBUG, tmp);
5054         }
5055
5056         /* Wait a little for things to settle down */
5057         udelay(50);
5058
5059         return 0;
5060 }
5061
5062 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5063                               struct amdgpu_ring *ring)
5064 {
5065         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5066         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5067         WREG32(mmCP_HQD_PQ_RPTR, 0);
5068         WREG32(mmCP_HQD_PQ_WPTR, 0);
5069         vi_srbm_select(adev, 0, 0, 0, 0);
5070 }
5071
5072 static int gfx_v8_0_post_soft_reset(void *handle)
5073 {
5074         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5075         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5076
5077         if ((!adev->gfx.grbm_soft_reset) &&
5078             (!adev->gfx.srbm_soft_reset))
5079                 return 0;
5080
5081         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5082         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5083
5084         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5085             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5086                 gfx_v8_0_cp_gfx_resume(adev);
5087
5088         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5089             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5090             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5091             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5092                 int i;
5093
5094                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5095                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5096
5097                         gfx_v8_0_init_hqd(adev, ring);
5098                 }
5099                 gfx_v8_0_cp_compute_resume(adev);
5100         }
5101         gfx_v8_0_rlc_start(adev);
5102
5103         return 0;
5104 }
5105
5106 /**
5107  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5108  *
5109  * @adev: amdgpu_device pointer
5110  *
5111  * Fetches a GPU clock counter snapshot.
5112  * Returns the 64 bit clock counter snapshot.
5113  */
5114 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5115 {
5116         uint64_t clock;
5117
5118         mutex_lock(&adev->gfx.gpu_clock_mutex);
5119         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5120         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5121                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5122         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5123         return clock;
5124 }
5125
5126 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5127                                           uint32_t vmid,
5128                                           uint32_t gds_base, uint32_t gds_size,
5129                                           uint32_t gws_base, uint32_t gws_size,
5130                                           uint32_t oa_base, uint32_t oa_size)
5131 {
5132         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5133         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5134
5135         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5136         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5137
5138         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5139         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5140
5141         /* GDS Base */
5142         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5143         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5144                                 WRITE_DATA_DST_SEL(0)));
5145         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5146         amdgpu_ring_write(ring, 0);
5147         amdgpu_ring_write(ring, gds_base);
5148
5149         /* GDS Size */
5150         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5151         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5152                                 WRITE_DATA_DST_SEL(0)));
5153         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5154         amdgpu_ring_write(ring, 0);
5155         amdgpu_ring_write(ring, gds_size);
5156
5157         /* GWS */
5158         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5159         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5160                                 WRITE_DATA_DST_SEL(0)));
5161         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5162         amdgpu_ring_write(ring, 0);
5163         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5164
5165         /* OA */
5166         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5167         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5168                                 WRITE_DATA_DST_SEL(0)));
5169         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5170         amdgpu_ring_write(ring, 0);
5171         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5172 }
5173
5174 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5175 {
5176         WREG32(mmSQ_IND_INDEX,
5177                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5178                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5179                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5180                 (SQ_IND_INDEX__FORCE_READ_MASK));
5181         return RREG32(mmSQ_IND_DATA);
5182 }
5183
5184 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5185                            uint32_t wave, uint32_t thread,
5186                            uint32_t regno, uint32_t num, uint32_t *out)
5187 {
5188         WREG32(mmSQ_IND_INDEX,
5189                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5190                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5191                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5192                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5193                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5194                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5195         while (num--)
5196                 *(out++) = RREG32(mmSQ_IND_DATA);
5197 }
5198
5199 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5200 {
5201         /* type 0 wave data */
5202         dst[(*no_fields)++] = 0;
5203         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5204         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5205         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5206         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5207         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5208         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5209         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5210         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5211         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5212         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5213         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5214         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5215         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5216         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5217         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5218         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5219         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5220         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5221 }
5222
5223 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5224                                      uint32_t wave, uint32_t start,
5225                                      uint32_t size, uint32_t *dst)
5226 {
5227         wave_read_regs(
5228                 adev, simd, wave, 0,
5229                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5230 }
5231
5232
5233 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5234         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5235         .select_se_sh = &gfx_v8_0_select_se_sh,
5236         .read_wave_data = &gfx_v8_0_read_wave_data,
5237         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5238 };
5239
5240 static int gfx_v8_0_early_init(void *handle)
5241 {
5242         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5243
5244         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5245         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5246         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5247         gfx_v8_0_set_ring_funcs(adev);
5248         gfx_v8_0_set_irq_funcs(adev);
5249         gfx_v8_0_set_gds_init(adev);
5250         gfx_v8_0_set_rlc_funcs(adev);
5251
5252         return 0;
5253 }
5254
5255 static int gfx_v8_0_late_init(void *handle)
5256 {
5257         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5258         int r;
5259
5260         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5261         if (r)
5262                 return r;
5263
5264         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5265         if (r)
5266                 return r;
5267
5268         /* requires IBs so do in late init after IB pool is initialized */
5269         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5270         if (r)
5271                 return r;
5272
5273         amdgpu_set_powergating_state(adev,
5274                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5275
5276         return 0;
5277 }
5278
5279 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5280                                                        bool enable)
5281 {
5282         if (adev->asic_type == CHIP_POLARIS11)
5283                 /* Send msg to SMU via Powerplay */
5284                 amdgpu_set_powergating_state(adev,
5285                                              AMD_IP_BLOCK_TYPE_SMC,
5286                                              enable ?
5287                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5288
5289         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5290 }
5291
5292 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5293                                                         bool enable)
5294 {
5295         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5296 }
5297
5298 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5299                 bool enable)
5300 {
5301         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5302 }
5303
5304 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5305                                           bool enable)
5306 {
5307         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5308 }
5309
5310 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5311                                                 bool enable)
5312 {
5313         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5314
5315         /* Read any GFX register to wake up GFX. */
5316         if (!enable)
5317                 RREG32(mmDB_RENDER_CONTROL);
5318 }
5319
5320 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5321                                           bool enable)
5322 {
5323         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5324                 cz_enable_gfx_cg_power_gating(adev, true);
5325                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5326                         cz_enable_gfx_pipeline_power_gating(adev, true);
5327         } else {
5328                 cz_enable_gfx_cg_power_gating(adev, false);
5329                 cz_enable_gfx_pipeline_power_gating(adev, false);
5330         }
5331 }
5332
5333 static int gfx_v8_0_set_powergating_state(void *handle,
5334                                           enum amd_powergating_state state)
5335 {
5336         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5337         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5338
5339         switch (adev->asic_type) {
5340         case CHIP_CARRIZO:
5341         case CHIP_STONEY:
5342
5343                 cz_update_gfx_cg_power_gating(adev, enable);
5344
5345                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5346                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5347                 else
5348                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5349
5350                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5351                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5352                 else
5353                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5354                 break;
5355         case CHIP_POLARIS11:
5356                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5357                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5358                 else
5359                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5360
5361                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5362                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5363                 else
5364                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5365
5366                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5367                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5368                 else
5369                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5370                 break;
5371         default:
5372                 break;
5373         }
5374
5375         return 0;
5376 }
5377
5378 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5379                                      uint32_t reg_addr, uint32_t cmd)
5380 {
5381         uint32_t data;
5382
5383         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5384
5385         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5386         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5387
5388         data = RREG32(mmRLC_SERDES_WR_CTRL);
5389         if (adev->asic_type == CHIP_STONEY)
5390                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5391                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5392                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5393                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5394                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5395                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5396                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5397                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5398                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5399         else
5400                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5401                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5402                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5403                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5404                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5405                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5406                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5407                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5408                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5409                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5410                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5411         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5412                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5413                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5414                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5415
5416         WREG32(mmRLC_SERDES_WR_CTRL, data);
5417 }
5418
5419 #define MSG_ENTER_RLC_SAFE_MODE     1
5420 #define MSG_EXIT_RLC_SAFE_MODE      0
5421 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5422 #define RLC_GPR_REG2__REQ__SHIFT 0
5423 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5424 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5425
5426 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5427 {
5428         u32 data = 0;
5429         unsigned i;
5430
5431         data = RREG32(mmRLC_CNTL);
5432         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5433                 return;
5434
5435         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5436             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5437                                AMD_PG_SUPPORT_GFX_DMG))) {
5438                 data |= RLC_GPR_REG2__REQ_MASK;
5439                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5440                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5441                 WREG32(mmRLC_GPR_REG2, data);
5442
5443                 for (i = 0; i < adev->usec_timeout; i++) {
5444                         if ((RREG32(mmRLC_GPM_STAT) &
5445                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5446                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5447                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5448                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5449                                 break;
5450                         udelay(1);
5451                 }
5452
5453                 for (i = 0; i < adev->usec_timeout; i++) {
5454                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5455                                 break;
5456                         udelay(1);
5457                 }
5458                 adev->gfx.rlc.in_safe_mode = true;
5459         }
5460 }
5461
5462 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5463 {
5464         u32 data;
5465         unsigned i;
5466
5467         data = RREG32(mmRLC_CNTL);
5468         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5469                 return;
5470
5471         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5472             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5473                                AMD_PG_SUPPORT_GFX_DMG))) {
5474                 data |= RLC_GPR_REG2__REQ_MASK;
5475                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5476                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5477                 WREG32(mmRLC_GPR_REG2, data);
5478                 adev->gfx.rlc.in_safe_mode = false;
5479         }
5480
5481         for (i = 0; i < adev->usec_timeout; i++) {
5482                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5483                         break;
5484                 udelay(1);
5485         }
5486 }
5487
5488 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5489 {
5490         u32 data;
5491         unsigned i;
5492
5493         data = RREG32(mmRLC_CNTL);
5494         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5495                 return;
5496
5497         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5498                 data |= RLC_SAFE_MODE__CMD_MASK;
5499                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5500                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5501                 WREG32(mmRLC_SAFE_MODE, data);
5502
5503                 for (i = 0; i < adev->usec_timeout; i++) {
5504                         if ((RREG32(mmRLC_GPM_STAT) &
5505                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5506                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5507                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5508                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5509                                 break;
5510                         udelay(1);
5511                 }
5512
5513                 for (i = 0; i < adev->usec_timeout; i++) {
5514                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5515                                 break;
5516                         udelay(1);
5517                 }
5518                 adev->gfx.rlc.in_safe_mode = true;
5519         }
5520 }
5521
5522 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5523 {
5524         u32 data = 0;
5525         unsigned i;
5526
5527         data = RREG32(mmRLC_CNTL);
5528         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5529                 return;
5530
5531         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5532                 if (adev->gfx.rlc.in_safe_mode) {
5533                         data |= RLC_SAFE_MODE__CMD_MASK;
5534                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5535                         WREG32(mmRLC_SAFE_MODE, data);
5536                         adev->gfx.rlc.in_safe_mode = false;
5537                 }
5538         }
5539
5540         for (i = 0; i < adev->usec_timeout; i++) {
5541                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5542                         break;
5543                 udelay(1);
5544         }
5545 }
5546
5547 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5548 {
5549         adev->gfx.rlc.in_safe_mode = true;
5550 }
5551
5552 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5553 {
5554         adev->gfx.rlc.in_safe_mode = false;
5555 }
5556
5557 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5558         .enter_safe_mode = cz_enter_rlc_safe_mode,
5559         .exit_safe_mode = cz_exit_rlc_safe_mode
5560 };
5561
5562 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5563         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5564         .exit_safe_mode = iceland_exit_rlc_safe_mode
5565 };
5566
5567 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5568         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5569         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5570 };
5571
5572 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5573                                                       bool enable)
5574 {
5575         uint32_t temp, data;
5576
5577         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5578
5579         /* It is disabled by HW by default */
5580         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5581                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5582                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5583                                 /* 1 - RLC memory Light sleep */
5584                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5585
5586                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5587                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5588                 }
5589
5590                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5591                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5592                 if (adev->flags & AMD_IS_APU)
5593                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5594                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5595                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5596                 else
5597                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5598                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5599                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5600                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5601
5602                 if (temp != data)
5603                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5604
5605                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5606                 gfx_v8_0_wait_for_rlc_serdes(adev);
5607
5608                 /* 5 - clear mgcg override */
5609                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5610
5611                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5612                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5613                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5614                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5615                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5616                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5617                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5618                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5619                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5620                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5621                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5622                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5623                         if (temp != data)
5624                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5625                 }
5626                 udelay(50);
5627
5628                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5629                 gfx_v8_0_wait_for_rlc_serdes(adev);
5630         } else {
5631                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5632                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5633                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5634                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5635                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5636                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5637                 if (temp != data)
5638                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5639
5640                 /* 2 - disable MGLS in RLC */
5641                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5642                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5643                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5644                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5645                 }
5646
5647                 /* 3 - disable MGLS in CP */
5648                 data = RREG32(mmCP_MEM_SLP_CNTL);
5649                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5650                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5651                         WREG32(mmCP_MEM_SLP_CNTL, data);
5652                 }
5653
5654                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5655                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5656                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5657                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5658                 if (temp != data)
5659                         WREG32(mmCGTS_SM_CTRL_REG, data);
5660
5661                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5662                 gfx_v8_0_wait_for_rlc_serdes(adev);
5663
5664                 /* 6 - set mgcg override */
5665                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5666
5667                 udelay(50);
5668
5669                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5670                 gfx_v8_0_wait_for_rlc_serdes(adev);
5671         }
5672
5673         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5674 }
5675
5676 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5677                                                       bool enable)
5678 {
5679         uint32_t temp, temp1, data, data1;
5680
5681         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5682
5683         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5684
5685         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5686                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5687                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5688                 if (temp1 != data1)
5689                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5690
5691                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5692                 gfx_v8_0_wait_for_rlc_serdes(adev);
5693
5694                 /* 2 - clear cgcg override */
5695                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5696
5697                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5698                 gfx_v8_0_wait_for_rlc_serdes(adev);
5699
5700                 /* 3 - write cmd to set CGLS */
5701                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5702
5703                 /* 4 - enable cgcg */
5704                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5705
5706                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5707                         /* enable cgls*/
5708                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5709
5710                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5711                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5712
5713                         if (temp1 != data1)
5714                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5715                 } else {
5716                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5717                 }
5718
5719                 if (temp != data)
5720                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5721
5722                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5723                  * Cmp_busy/GFX_Idle interrupts
5724                  */
5725                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5726         } else {
5727                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5728                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5729
5730                 /* TEST CGCG */
5731                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5732                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5733                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5734                 if (temp1 != data1)
5735                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5736
5737                 /* read gfx register to wake up cgcg */
5738                 RREG32(mmCB_CGTT_SCLK_CTRL);
5739                 RREG32(mmCB_CGTT_SCLK_CTRL);
5740                 RREG32(mmCB_CGTT_SCLK_CTRL);
5741                 RREG32(mmCB_CGTT_SCLK_CTRL);
5742
5743                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5744                 gfx_v8_0_wait_for_rlc_serdes(adev);
5745
5746                 /* write cmd to Set CGCG Overrride */
5747                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5748
5749                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5750                 gfx_v8_0_wait_for_rlc_serdes(adev);
5751
5752                 /* write cmd to Clear CGLS */
5753                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5754
5755                 /* disable cgcg, cgls should be disabled too. */
5756                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5757                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5758                 if (temp != data)
5759                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5760         }
5761
5762         gfx_v8_0_wait_for_rlc_serdes(adev);
5763
5764         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5765 }
5766 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5767                                             bool enable)
5768 {
5769         if (enable) {
5770                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5771                  * ===  MGCG + MGLS + TS(CG/LS) ===
5772                  */
5773                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5774                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5775         } else {
5776                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5777                  * ===  CGCG + CGLS ===
5778                  */
5779                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5780                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5781         }
5782         return 0;
5783 }
5784
5785 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5786                                           enum amd_clockgating_state state)
5787 {
5788         uint32_t msg_id, pp_state = 0;
5789         uint32_t pp_support_state = 0;
5790         void *pp_handle = adev->powerplay.pp_handle;
5791
5792         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5793                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5794                         pp_support_state = PP_STATE_SUPPORT_LS;
5795                         pp_state = PP_STATE_LS;
5796                 }
5797                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5798                         pp_support_state |= PP_STATE_SUPPORT_CG;
5799                         pp_state |= PP_STATE_CG;
5800                 }
5801                 if (state == AMD_CG_STATE_UNGATE)
5802                         pp_state = 0;
5803
5804                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5805                                 PP_BLOCK_GFX_CG,
5806                                 pp_support_state,
5807                                 pp_state);
5808                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5809         }
5810
5811         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5812                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5813                         pp_support_state = PP_STATE_SUPPORT_LS;
5814                         pp_state = PP_STATE_LS;
5815                 }
5816
5817                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5818                         pp_support_state |= PP_STATE_SUPPORT_CG;
5819                         pp_state |= PP_STATE_CG;
5820                 }
5821
5822                 if (state == AMD_CG_STATE_UNGATE)
5823                         pp_state = 0;
5824
5825                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5826                                 PP_BLOCK_GFX_MG,
5827                                 pp_support_state,
5828                                 pp_state);
5829                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5830         }
5831
5832         return 0;
5833 }
5834
5835 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5836                                           enum amd_clockgating_state state)
5837 {
5838
5839         uint32_t msg_id, pp_state = 0;
5840         uint32_t pp_support_state = 0;
5841         void *pp_handle = adev->powerplay.pp_handle;
5842
5843         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5844                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5845                         pp_support_state = PP_STATE_SUPPORT_LS;
5846                         pp_state = PP_STATE_LS;
5847                 }
5848                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5849                         pp_support_state |= PP_STATE_SUPPORT_CG;
5850                         pp_state |= PP_STATE_CG;
5851                 }
5852                 if (state == AMD_CG_STATE_UNGATE)
5853                         pp_state = 0;
5854
5855                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5856                                 PP_BLOCK_GFX_CG,
5857                                 pp_support_state,
5858                                 pp_state);
5859                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5860         }
5861
5862         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5863                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5864                         pp_support_state = PP_STATE_SUPPORT_LS;
5865                         pp_state = PP_STATE_LS;
5866                 }
5867                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5868                         pp_support_state |= PP_STATE_SUPPORT_CG;
5869                         pp_state |= PP_STATE_CG;
5870                 }
5871                 if (state == AMD_CG_STATE_UNGATE)
5872                         pp_state = 0;
5873
5874                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5875                                 PP_BLOCK_GFX_3D,
5876                                 pp_support_state,
5877                                 pp_state);
5878                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5879         }
5880
5881         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5882                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5883                         pp_support_state = PP_STATE_SUPPORT_LS;
5884                         pp_state = PP_STATE_LS;
5885                 }
5886
5887                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5888                         pp_support_state |= PP_STATE_SUPPORT_CG;
5889                         pp_state |= PP_STATE_CG;
5890                 }
5891
5892                 if (state == AMD_CG_STATE_UNGATE)
5893                         pp_state = 0;
5894
5895                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5896                                 PP_BLOCK_GFX_MG,
5897                                 pp_support_state,
5898                                 pp_state);
5899                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5900         }
5901
5902         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5903                 pp_support_state = PP_STATE_SUPPORT_LS;
5904
5905                 if (state == AMD_CG_STATE_UNGATE)
5906                         pp_state = 0;
5907                 else
5908                         pp_state = PP_STATE_LS;
5909
5910                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5911                                 PP_BLOCK_GFX_RLC,
5912                                 pp_support_state,
5913                                 pp_state);
5914                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5915         }
5916
5917         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5918                 pp_support_state = PP_STATE_SUPPORT_LS;
5919
5920                 if (state == AMD_CG_STATE_UNGATE)
5921                         pp_state = 0;
5922                 else
5923                         pp_state = PP_STATE_LS;
5924                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5925                         PP_BLOCK_GFX_CP,
5926                         pp_support_state,
5927                         pp_state);
5928                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5929         }
5930
5931         return 0;
5932 }
5933
5934 static int gfx_v8_0_set_clockgating_state(void *handle,
5935                                           enum amd_clockgating_state state)
5936 {
5937         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5938
5939         switch (adev->asic_type) {
5940         case CHIP_FIJI:
5941         case CHIP_CARRIZO:
5942         case CHIP_STONEY:
5943                 gfx_v8_0_update_gfx_clock_gating(adev,
5944                                                  state == AMD_CG_STATE_GATE ? true : false);
5945                 break;
5946         case CHIP_TONGA:
5947                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5948                 break;
5949         case CHIP_POLARIS10:
5950         case CHIP_POLARIS11:
5951                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5952                 break;
5953         default:
5954                 break;
5955         }
5956         return 0;
5957 }
5958
5959 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5960 {
5961         return ring->adev->wb.wb[ring->rptr_offs];
5962 }
5963
5964 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5965 {
5966         struct amdgpu_device *adev = ring->adev;
5967
5968         if (ring->use_doorbell)
5969                 /* XXX check if swapping is necessary on BE */
5970                 return ring->adev->wb.wb[ring->wptr_offs];
5971         else
5972                 return RREG32(mmCP_RB0_WPTR);
5973 }
5974
5975 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5976 {
5977         struct amdgpu_device *adev = ring->adev;
5978
5979         if (ring->use_doorbell) {
5980                 /* XXX check if swapping is necessary on BE */
5981                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5982                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5983         } else {
5984                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5985                 (void)RREG32(mmCP_RB0_WPTR);
5986         }
5987 }
5988
5989 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5990 {
5991         u32 ref_and_mask, reg_mem_engine;
5992
5993         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5994                 switch (ring->me) {
5995                 case 1:
5996                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5997                         break;
5998                 case 2:
5999                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6000                         break;
6001                 default:
6002                         return;
6003                 }
6004                 reg_mem_engine = 0;
6005         } else {
6006                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6007                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6008         }
6009
6010         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6011         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6012                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6013                                  reg_mem_engine));
6014         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6015         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6016         amdgpu_ring_write(ring, ref_and_mask);
6017         amdgpu_ring_write(ring, ref_and_mask);
6018         amdgpu_ring_write(ring, 0x20); /* poll interval */
6019 }
6020
6021 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6022 {
6023         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6024         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6025                 EVENT_INDEX(4));
6026
6027         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6028         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6029                 EVENT_INDEX(0));
6030 }
6031
6032
6033 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6034 {
6035         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6036         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6037                                  WRITE_DATA_DST_SEL(0) |
6038                                  WR_CONFIRM));
6039         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6040         amdgpu_ring_write(ring, 0);
6041         amdgpu_ring_write(ring, 1);
6042
6043 }
6044
6045 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6046                                       struct amdgpu_ib *ib,
6047                                       unsigned vm_id, bool ctx_switch)
6048 {
6049         u32 header, control = 0;
6050
6051         if (ib->flags & AMDGPU_IB_FLAG_CE)
6052                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6053         else
6054                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6055
6056         control |= ib->length_dw | (vm_id << 24);
6057
6058         amdgpu_ring_write(ring, header);
6059         amdgpu_ring_write(ring,
6060 #ifdef __BIG_ENDIAN
6061                           (2 << 0) |
6062 #endif
6063                           (ib->gpu_addr & 0xFFFFFFFC));
6064         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6065         amdgpu_ring_write(ring, control);
6066 }
6067
6068 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6069                                           struct amdgpu_ib *ib,
6070                                           unsigned vm_id, bool ctx_switch)
6071 {
6072         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6073
6074         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6075         amdgpu_ring_write(ring,
6076 #ifdef __BIG_ENDIAN
6077                                 (2 << 0) |
6078 #endif
6079                                 (ib->gpu_addr & 0xFFFFFFFC));
6080         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6081         amdgpu_ring_write(ring, control);
6082 }
6083
6084 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6085                                          u64 seq, unsigned flags)
6086 {
6087         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6088         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6089
6090         /* EVENT_WRITE_EOP - flush caches, send int */
6091         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6092         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6093                                  EOP_TC_ACTION_EN |
6094                                  EOP_TC_WB_ACTION_EN |
6095                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6096                                  EVENT_INDEX(5)));
6097         amdgpu_ring_write(ring, addr & 0xfffffffc);
6098         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6099                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6100         amdgpu_ring_write(ring, lower_32_bits(seq));
6101         amdgpu_ring_write(ring, upper_32_bits(seq));
6102
6103 }
6104
6105 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6106 {
6107         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6108         uint32_t seq = ring->fence_drv.sync_seq;
6109         uint64_t addr = ring->fence_drv.gpu_addr;
6110
6111         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6112         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6113                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6114                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6115         amdgpu_ring_write(ring, addr & 0xfffffffc);
6116         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6117         amdgpu_ring_write(ring, seq);
6118         amdgpu_ring_write(ring, 0xffffffff);
6119         amdgpu_ring_write(ring, 4); /* poll interval */
6120 }
6121
6122 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6123                                         unsigned vm_id, uint64_t pd_addr)
6124 {
6125         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6126
6127         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6128         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6129                                  WRITE_DATA_DST_SEL(0)) |
6130                                  WR_CONFIRM);
6131         if (vm_id < 8) {
6132                 amdgpu_ring_write(ring,
6133                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6134         } else {
6135                 amdgpu_ring_write(ring,
6136                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6137         }
6138         amdgpu_ring_write(ring, 0);
6139         amdgpu_ring_write(ring, pd_addr >> 12);
6140
6141         /* bits 0-15 are the VM contexts0-15 */
6142         /* invalidate the cache */
6143         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6144         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6145                                  WRITE_DATA_DST_SEL(0)));
6146         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6147         amdgpu_ring_write(ring, 0);
6148         amdgpu_ring_write(ring, 1 << vm_id);
6149
6150         /* wait for the invalidate to complete */
6151         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6152         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6153                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6154                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6155         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6156         amdgpu_ring_write(ring, 0);
6157         amdgpu_ring_write(ring, 0); /* ref */
6158         amdgpu_ring_write(ring, 0); /* mask */
6159         amdgpu_ring_write(ring, 0x20); /* poll interval */
6160
6161         /* compute doesn't have PFP */
6162         if (usepfp) {
6163                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6164                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6165                 amdgpu_ring_write(ring, 0x0);
6166                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6167                 amdgpu_ring_insert_nop(ring, 128);
6168         }
6169 }
6170
6171 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6172 {
6173         return ring->adev->wb.wb[ring->wptr_offs];
6174 }
6175
6176 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6177 {
6178         struct amdgpu_device *adev = ring->adev;
6179
6180         /* XXX check if swapping is necessary on BE */
6181         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6182         WDOORBELL32(ring->doorbell_index, ring->wptr);
6183 }
6184
6185 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6186                                              u64 addr, u64 seq,
6187                                              unsigned flags)
6188 {
6189         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6190         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6191
6192         /* RELEASE_MEM - flush caches, send int */
6193         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6194         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6195                                  EOP_TC_ACTION_EN |
6196                                  EOP_TC_WB_ACTION_EN |
6197                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6198                                  EVENT_INDEX(5)));
6199         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6200         amdgpu_ring_write(ring, addr & 0xfffffffc);
6201         amdgpu_ring_write(ring, upper_32_bits(addr));
6202         amdgpu_ring_write(ring, lower_32_bits(seq));
6203         amdgpu_ring_write(ring, upper_32_bits(seq));
6204 }
6205
6206 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6207 {
6208         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6209         amdgpu_ring_write(ring, 0);
6210 }
6211
6212 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6213 {
6214         uint32_t dw2 = 0;
6215
6216         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6217         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6218                 gfx_v8_0_ring_emit_vgt_flush(ring);
6219                 /* set load_global_config & load_global_uconfig */
6220                 dw2 |= 0x8001;
6221                 /* set load_cs_sh_regs */
6222                 dw2 |= 0x01000000;
6223                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6224                 dw2 |= 0x10002;
6225
6226                 /* set load_ce_ram if preamble presented */
6227                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6228                         dw2 |= 0x10000000;
6229         } else {
6230                 /* still load_ce_ram if this is the first time preamble presented
6231                  * although there is no context switch happens.
6232                  */
6233                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6234                         dw2 |= 0x10000000;
6235         }
6236
6237         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6238         amdgpu_ring_write(ring, dw2);
6239         amdgpu_ring_write(ring, 0);
6240 }
6241
6242 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6243                                                  enum amdgpu_interrupt_state state)
6244 {
6245         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6246                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6247 }
6248
6249 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6250                                                      int me, int pipe,
6251                                                      enum amdgpu_interrupt_state state)
6252 {
6253         /*
6254          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6255          * handles the setting of interrupts for this specific pipe. All other
6256          * pipes' interrupts are set by amdkfd.
6257          */
6258
6259         if (me == 1) {
6260                 switch (pipe) {
6261                 case 0:
6262                         break;
6263                 default:
6264                         DRM_DEBUG("invalid pipe %d\n", pipe);
6265                         return;
6266                 }
6267         } else {
6268                 DRM_DEBUG("invalid me %d\n", me);
6269                 return;
6270         }
6271
6272         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6273                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6274 }
6275
6276 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6277                                              struct amdgpu_irq_src *source,
6278                                              unsigned type,
6279                                              enum amdgpu_interrupt_state state)
6280 {
6281         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6282                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6283
6284         return 0;
6285 }
6286
6287 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6288                                               struct amdgpu_irq_src *source,
6289                                               unsigned type,
6290                                               enum amdgpu_interrupt_state state)
6291 {
6292         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6293                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6294
6295         return 0;
6296 }
6297
6298 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6299                                             struct amdgpu_irq_src *src,
6300                                             unsigned type,
6301                                             enum amdgpu_interrupt_state state)
6302 {
6303         switch (type) {
6304         case AMDGPU_CP_IRQ_GFX_EOP:
6305                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6306                 break;
6307         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6308                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6309                 break;
6310         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6311                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6312                 break;
6313         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6314                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6315                 break;
6316         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6317                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6318                 break;
6319         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6320                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6321                 break;
6322         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6323                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6324                 break;
6325         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6326                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6327                 break;
6328         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6329                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6330                 break;
6331         default:
6332                 break;
6333         }
6334         return 0;
6335 }
6336
6337 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6338                             struct amdgpu_irq_src *source,
6339                             struct amdgpu_iv_entry *entry)
6340 {
6341         int i;
6342         u8 me_id, pipe_id, queue_id;
6343         struct amdgpu_ring *ring;
6344
6345         DRM_DEBUG("IH: CP EOP\n");
6346         me_id = (entry->ring_id & 0x0c) >> 2;
6347         pipe_id = (entry->ring_id & 0x03) >> 0;
6348         queue_id = (entry->ring_id & 0x70) >> 4;
6349
6350         switch (me_id) {
6351         case 0:
6352                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6353                 break;
6354         case 1:
6355         case 2:
6356                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6357                         ring = &adev->gfx.compute_ring[i];
6358                         /* Per-queue interrupt is supported for MEC starting from VI.
6359                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6360                           */
6361                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6362                                 amdgpu_fence_process(ring);
6363                 }
6364                 break;
6365         }
6366         return 0;
6367 }
6368
6369 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6370                                  struct amdgpu_irq_src *source,
6371                                  struct amdgpu_iv_entry *entry)
6372 {
6373         DRM_ERROR("Illegal register access in command stream\n");
6374         schedule_work(&adev->reset_work);
6375         return 0;
6376 }
6377
6378 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6379                                   struct amdgpu_irq_src *source,
6380                                   struct amdgpu_iv_entry *entry)
6381 {
6382         DRM_ERROR("Illegal instruction in command stream\n");
6383         schedule_work(&adev->reset_work);
6384         return 0;
6385 }
6386
6387 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6388         .name = "gfx_v8_0",
6389         .early_init = gfx_v8_0_early_init,
6390         .late_init = gfx_v8_0_late_init,
6391         .sw_init = gfx_v8_0_sw_init,
6392         .sw_fini = gfx_v8_0_sw_fini,
6393         .hw_init = gfx_v8_0_hw_init,
6394         .hw_fini = gfx_v8_0_hw_fini,
6395         .suspend = gfx_v8_0_suspend,
6396         .resume = gfx_v8_0_resume,
6397         .is_idle = gfx_v8_0_is_idle,
6398         .wait_for_idle = gfx_v8_0_wait_for_idle,
6399         .check_soft_reset = gfx_v8_0_check_soft_reset,
6400         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6401         .soft_reset = gfx_v8_0_soft_reset,
6402         .post_soft_reset = gfx_v8_0_post_soft_reset,
6403         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6404         .set_powergating_state = gfx_v8_0_set_powergating_state,
6405 };
6406
6407 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6408         .type = AMDGPU_RING_TYPE_GFX,
6409         .align_mask = 0xff,
6410         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6411         .get_rptr = gfx_v8_0_ring_get_rptr,
6412         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6413         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6414         .emit_frame_size =
6415                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6416                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6417                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6418                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6419                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6420                 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6421                 2 + /* gfx_v8_ring_emit_sb */
6422                 3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
6423         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6424         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6425         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6426         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6427         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6428         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6429         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6430         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6431         .test_ring = gfx_v8_0_ring_test_ring,
6432         .test_ib = gfx_v8_0_ring_test_ib,
6433         .insert_nop = amdgpu_ring_insert_nop,
6434         .pad_ib = amdgpu_ring_generic_pad_ib,
6435         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6436         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6437 };
6438
6439 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6440         .type = AMDGPU_RING_TYPE_COMPUTE,
6441         .align_mask = 0xff,
6442         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6443         .get_rptr = gfx_v8_0_ring_get_rptr,
6444         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6445         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6446         .emit_frame_size =
6447                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6448                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6449                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6450                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6451                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6452                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6453         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6454         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6455         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6456         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6457         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6458         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6459         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6460         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6461         .test_ring = gfx_v8_0_ring_test_ring,
6462         .test_ib = gfx_v8_0_ring_test_ib,
6463         .insert_nop = amdgpu_ring_insert_nop,
6464         .pad_ib = amdgpu_ring_generic_pad_ib,
6465 };
6466
6467 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6468 {
6469         int i;
6470
6471         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6472                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6473
6474         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6475                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6476 }
6477
6478 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6479         .set = gfx_v8_0_set_eop_interrupt_state,
6480         .process = gfx_v8_0_eop_irq,
6481 };
6482
6483 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6484         .set = gfx_v8_0_set_priv_reg_fault_state,
6485         .process = gfx_v8_0_priv_reg_irq,
6486 };
6487
6488 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6489         .set = gfx_v8_0_set_priv_inst_fault_state,
6490         .process = gfx_v8_0_priv_inst_irq,
6491 };
6492
6493 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6494 {
6495         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6496         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6497
6498         adev->gfx.priv_reg_irq.num_types = 1;
6499         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6500
6501         adev->gfx.priv_inst_irq.num_types = 1;
6502         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6503 }
6504
6505 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6506 {
6507         switch (adev->asic_type) {
6508         case CHIP_TOPAZ:
6509                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6510                 break;
6511         case CHIP_STONEY:
6512         case CHIP_CARRIZO:
6513                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6514                 break;
6515         default:
6516                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6517                 break;
6518         }
6519 }
6520
6521 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6522 {
6523         /* init asci gds info */
6524         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6525         adev->gds.gws.total_size = 64;
6526         adev->gds.oa.total_size = 16;
6527
6528         if (adev->gds.mem.total_size == 64 * 1024) {
6529                 adev->gds.mem.gfx_partition_size = 4096;
6530                 adev->gds.mem.cs_partition_size = 4096;
6531
6532                 adev->gds.gws.gfx_partition_size = 4;
6533                 adev->gds.gws.cs_partition_size = 4;
6534
6535                 adev->gds.oa.gfx_partition_size = 4;
6536                 adev->gds.oa.cs_partition_size = 1;
6537         } else {
6538                 adev->gds.mem.gfx_partition_size = 1024;
6539                 adev->gds.mem.cs_partition_size = 1024;
6540
6541                 adev->gds.gws.gfx_partition_size = 16;
6542                 adev->gds.gws.cs_partition_size = 16;
6543
6544                 adev->gds.oa.gfx_partition_size = 4;
6545                 adev->gds.oa.cs_partition_size = 4;
6546         }
6547 }
6548
6549 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6550                                                  u32 bitmap)
6551 {
6552         u32 data;
6553
6554         if (!bitmap)
6555                 return;
6556
6557         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6558         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6559
6560         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6561 }
6562
6563 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6564 {
6565         u32 data, mask;
6566
6567         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6568                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6569
6570         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6571
6572         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6573 }
6574
6575 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6576 {
6577         int i, j, k, counter, active_cu_number = 0;
6578         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6579         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6580         unsigned disable_masks[4 * 2];
6581
6582         memset(cu_info, 0, sizeof(*cu_info));
6583
6584         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6585
6586         mutex_lock(&adev->grbm_idx_mutex);
6587         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6588                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6589                         mask = 1;
6590                         ao_bitmap = 0;
6591                         counter = 0;
6592                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6593                         if (i < 4 && j < 2)
6594                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6595                                         adev, disable_masks[i * 2 + j]);
6596                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6597                         cu_info->bitmap[i][j] = bitmap;
6598
6599                         for (k = 0; k < 16; k ++) {
6600                                 if (bitmap & mask) {
6601                                         if (counter < 2)
6602                                                 ao_bitmap |= mask;
6603                                         counter ++;
6604                                 }
6605                                 mask <<= 1;
6606                         }
6607                         active_cu_number += counter;
6608                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6609                 }
6610         }
6611         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6612         mutex_unlock(&adev->grbm_idx_mutex);
6613
6614         cu_info->number = active_cu_number;
6615         cu_info->ao_cu_mask = ao_cu_mask;
6616 }
6617
6618 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6619 {
6620         .type = AMD_IP_BLOCK_TYPE_GFX,
6621         .major = 8,
6622         .minor = 0,
6623         .rev = 0,
6624         .funcs = &gfx_v8_0_ip_funcs,
6625 };
6626
6627 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6628 {
6629         .type = AMD_IP_BLOCK_TYPE_GFX,
6630         .major = 8,
6631         .minor = 1,
6632         .rev = 0,
6633         .funcs = &gfx_v8_0_ip_funcs,
6634 };