Merge tag 'mfd-for-linus-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
143 {
144         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
145         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
146         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
147         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
148         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
149         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
150         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
151         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
152         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
153         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
154         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
155         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
156         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
157         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
158         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
159         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
160 };
161
162 static const u32 golden_settings_tonga_a11[] =
163 {
164         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
165         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
166         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
167         mmGB_GPU_ID, 0x0000000f, 0x00000000,
168         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
169         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
170         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
171         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
172         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
173         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
174         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
175         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
176         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
177         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
178         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
179         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
180 };
181
182 static const u32 tonga_golden_common_all[] =
183 {
184         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
185         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
186         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
187         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
188         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
189         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
190         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
191         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
192 };
193
194 static const u32 tonga_mgcg_cgcg_init[] =
195 {
196         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
197         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
198         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
203         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
207         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
212         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
216         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
217         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
218         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
221         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
222         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
223         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
224         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
225         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
226         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
230         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
235         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
240         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
245         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
250         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
255         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
260         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
265         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
268         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
269         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
270         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
271 };
272
273 static const u32 golden_settings_polaris11_a11[] =
274 {
275         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
276         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
277         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
278         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
279         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
280         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
281         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
282         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
283         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
284         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
285         mmSQ_CONFIG, 0x07f80000, 0x01180000,
286         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
287         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
288         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
289         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
290         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
291         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
292 };
293
294 static const u32 polaris11_golden_common_all[] =
295 {
296         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
297         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
298         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
299         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
300         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
301         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
302 };
303
304 static const u32 golden_settings_polaris10_a11[] =
305 {
306         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
307         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
308         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
309         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
310         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
311         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
312         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
313         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
314         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
315         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
316         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
317         mmSQ_CONFIG, 0x07f80000, 0x07180000,
318         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
319         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
320         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
321         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
322         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324
325 static const u32 polaris10_golden_common_all[] =
326 {
327         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
329         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
330         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
331         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
332         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
334         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
335 };
336
337 static const u32 fiji_golden_common_all[] =
338 {
339         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
341         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
342         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
343         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
347         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
348         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
349 };
350
351 static const u32 golden_settings_fiji_a10[] =
352 {
353         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
354         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
355         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
356         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
357         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
358         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
359         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
360         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
361         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
362         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
363         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
364 };
365
366 static const u32 fiji_mgcg_cgcg_init[] =
367 {
368         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
369         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
375         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
377         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
379         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
384         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
387         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
388         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
389         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
390         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
393         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
394         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
395         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
396         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
397         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
398         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
399         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
400         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
401         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
402         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
403 };
404
405 static const u32 golden_settings_iceland_a11[] =
406 {
407         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
408         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
409         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
410         mmGB_GPU_ID, 0x0000000f, 0x00000000,
411         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
412         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
413         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
414         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
415         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
416         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
417         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
418         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
419         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
420         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
421         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
422         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
423 };
424
425 static const u32 iceland_golden_common_all[] =
426 {
427         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
428         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
429         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
430         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
431         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
432         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
433         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
434         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
435 };
436
437 static const u32 iceland_mgcg_cgcg_init[] =
438 {
439         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
440         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
441         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
444         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
445         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
446         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
448         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
450         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
457         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
458         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
459         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
460         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
461         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
462         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
464         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
465         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
466         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
467         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
468         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
469         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
470         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
471         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
472         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
473         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
474         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
475         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
476         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
477         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
478         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
479         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
480         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
483         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
488         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
491         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
492         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
493         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
494         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
495         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
496         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
497         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
498         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
499         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
500         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
501         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
502         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
503 };
504
505 static const u32 cz_golden_settings_a11[] =
506 {
507         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
508         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
509         mmGB_GPU_ID, 0x0000000f, 0x00000000,
510         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
511         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
512         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
513         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
514         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
515         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
516         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
517         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
518         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
519 };
520
521 static const u32 cz_golden_common_all[] =
522 {
523         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
524         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
525         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
526         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
527         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
528         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
529         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
530         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
531 };
532
533 static const u32 cz_mgcg_cgcg_init[] =
534 {
535         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
536         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
537         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
542         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
544         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
546         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
551         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
553         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
554         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
555         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
556         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
557         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
560         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
561         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
562         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
563         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
564         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
565         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
566         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
567         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
568         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
569         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
570         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
571         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
572         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
573         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
574         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
575         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
576         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
577         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
578         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
579         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
580         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
581         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
582         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
583         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
584         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
585         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
586         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
587         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
588         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
589         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
590         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
591         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
592         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
593         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
594         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
595         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
596         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
597         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
598         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
599         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
600         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
601         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
602         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
603         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
604         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
605         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
606         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
607         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
608         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
609         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
610 };
611
612 static const u32 stoney_golden_settings_a11[] =
613 {
614         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
615         mmGB_GPU_ID, 0x0000000f, 0x00000000,
616         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
617         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
618         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
619         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
620         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
621         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
622         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
623         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
624 };
625
626 static const u32 stoney_golden_common_all[] =
627 {
628         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
630         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
631         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
632         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
633         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
634         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
635         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
636 };
637
638 static const u32 stoney_mgcg_cgcg_init[] =
639 {
640         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
641         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
642         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
643         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
644         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
645 };
646
647 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
650 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
651 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
652 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
653
654 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
655 {
656         switch (adev->asic_type) {
657         case CHIP_TOPAZ:
658                 amdgpu_program_register_sequence(adev,
659                                                  iceland_mgcg_cgcg_init,
660                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
661                 amdgpu_program_register_sequence(adev,
662                                                  golden_settings_iceland_a11,
663                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
664                 amdgpu_program_register_sequence(adev,
665                                                  iceland_golden_common_all,
666                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
667                 break;
668         case CHIP_FIJI:
669                 amdgpu_program_register_sequence(adev,
670                                                  fiji_mgcg_cgcg_init,
671                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
672                 amdgpu_program_register_sequence(adev,
673                                                  golden_settings_fiji_a10,
674                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
675                 amdgpu_program_register_sequence(adev,
676                                                  fiji_golden_common_all,
677                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
678                 break;
679
680         case CHIP_TONGA:
681                 amdgpu_program_register_sequence(adev,
682                                                  tonga_mgcg_cgcg_init,
683                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
684                 amdgpu_program_register_sequence(adev,
685                                                  golden_settings_tonga_a11,
686                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
687                 amdgpu_program_register_sequence(adev,
688                                                  tonga_golden_common_all,
689                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
690                 break;
691         case CHIP_POLARIS11:
692                 amdgpu_program_register_sequence(adev,
693                                                  golden_settings_polaris11_a11,
694                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
695                 amdgpu_program_register_sequence(adev,
696                                                  polaris11_golden_common_all,
697                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
698                 break;
699         case CHIP_POLARIS10:
700                 amdgpu_program_register_sequence(adev,
701                                                  golden_settings_polaris10_a11,
702                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
703                 amdgpu_program_register_sequence(adev,
704                                                  polaris10_golden_common_all,
705                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
706                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
707                 if (adev->pdev->revision == 0xc7 &&
708                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
709                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
710                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
711                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
712                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
713                 }
714                 break;
715         case CHIP_CARRIZO:
716                 amdgpu_program_register_sequence(adev,
717                                                  cz_mgcg_cgcg_init,
718                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
719                 amdgpu_program_register_sequence(adev,
720                                                  cz_golden_settings_a11,
721                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
722                 amdgpu_program_register_sequence(adev,
723                                                  cz_golden_common_all,
724                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
725                 break;
726         case CHIP_STONEY:
727                 amdgpu_program_register_sequence(adev,
728                                                  stoney_mgcg_cgcg_init,
729                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
730                 amdgpu_program_register_sequence(adev,
731                                                  stoney_golden_settings_a11,
732                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
733                 amdgpu_program_register_sequence(adev,
734                                                  stoney_golden_common_all,
735                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
736                 break;
737         default:
738                 break;
739         }
740 }
741
742 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
743 {
744         int i;
745
746         adev->gfx.scratch.num_reg = 7;
747         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
748         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
749                 adev->gfx.scratch.free[i] = true;
750                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
751         }
752 }
753
754 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
755 {
756         struct amdgpu_device *adev = ring->adev;
757         uint32_t scratch;
758         uint32_t tmp = 0;
759         unsigned i;
760         int r;
761
762         r = amdgpu_gfx_scratch_get(adev, &scratch);
763         if (r) {
764                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
765                 return r;
766         }
767         WREG32(scratch, 0xCAFEDEAD);
768         r = amdgpu_ring_alloc(ring, 3);
769         if (r) {
770                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
771                           ring->idx, r);
772                 amdgpu_gfx_scratch_free(adev, scratch);
773                 return r;
774         }
775         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
776         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
777         amdgpu_ring_write(ring, 0xDEADBEEF);
778         amdgpu_ring_commit(ring);
779
780         for (i = 0; i < adev->usec_timeout; i++) {
781                 tmp = RREG32(scratch);
782                 if (tmp == 0xDEADBEEF)
783                         break;
784                 DRM_UDELAY(1);
785         }
786         if (i < adev->usec_timeout) {
787                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
788                          ring->idx, i);
789         } else {
790                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
791                           ring->idx, scratch, tmp);
792                 r = -EINVAL;
793         }
794         amdgpu_gfx_scratch_free(adev, scratch);
795         return r;
796 }
797
798 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
799 {
800         struct amdgpu_device *adev = ring->adev;
801         struct amdgpu_ib ib;
802         struct dma_fence *f = NULL;
803         uint32_t scratch;
804         uint32_t tmp = 0;
805         long r;
806
807         r = amdgpu_gfx_scratch_get(adev, &scratch);
808         if (r) {
809                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
810                 return r;
811         }
812         WREG32(scratch, 0xCAFEDEAD);
813         memset(&ib, 0, sizeof(ib));
814         r = amdgpu_ib_get(adev, NULL, 256, &ib);
815         if (r) {
816                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
817                 goto err1;
818         }
819         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
820         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
821         ib.ptr[2] = 0xDEADBEEF;
822         ib.length_dw = 3;
823
824         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
825         if (r)
826                 goto err2;
827
828         r = dma_fence_wait_timeout(f, false, timeout);
829         if (r == 0) {
830                 DRM_ERROR("amdgpu: IB test timed out.\n");
831                 r = -ETIMEDOUT;
832                 goto err2;
833         } else if (r < 0) {
834                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
835                 goto err2;
836         }
837         tmp = RREG32(scratch);
838         if (tmp == 0xDEADBEEF) {
839                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
840                 r = 0;
841         } else {
842                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
843                           scratch, tmp);
844                 r = -EINVAL;
845         }
846 err2:
847         amdgpu_ib_free(adev, &ib, NULL);
848         dma_fence_put(f);
849 err1:
850         amdgpu_gfx_scratch_free(adev, scratch);
851         return r;
852 }
853
854
855 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
856         release_firmware(adev->gfx.pfp_fw);
857         adev->gfx.pfp_fw = NULL;
858         release_firmware(adev->gfx.me_fw);
859         adev->gfx.me_fw = NULL;
860         release_firmware(adev->gfx.ce_fw);
861         adev->gfx.ce_fw = NULL;
862         release_firmware(adev->gfx.rlc_fw);
863         adev->gfx.rlc_fw = NULL;
864         release_firmware(adev->gfx.mec_fw);
865         adev->gfx.mec_fw = NULL;
866         if ((adev->asic_type != CHIP_STONEY) &&
867             (adev->asic_type != CHIP_TOPAZ))
868                 release_firmware(adev->gfx.mec2_fw);
869         adev->gfx.mec2_fw = NULL;
870
871         kfree(adev->gfx.rlc.register_list_format);
872 }
873
874 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
875 {
876         const char *chip_name;
877         char fw_name[30];
878         int err;
879         struct amdgpu_firmware_info *info = NULL;
880         const struct common_firmware_header *header = NULL;
881         const struct gfx_firmware_header_v1_0 *cp_hdr;
882         const struct rlc_firmware_header_v2_0 *rlc_hdr;
883         unsigned int *tmp = NULL, i;
884
885         DRM_DEBUG("\n");
886
887         switch (adev->asic_type) {
888         case CHIP_TOPAZ:
889                 chip_name = "topaz";
890                 break;
891         case CHIP_TONGA:
892                 chip_name = "tonga";
893                 break;
894         case CHIP_CARRIZO:
895                 chip_name = "carrizo";
896                 break;
897         case CHIP_FIJI:
898                 chip_name = "fiji";
899                 break;
900         case CHIP_POLARIS11:
901                 chip_name = "polaris11";
902                 break;
903         case CHIP_POLARIS10:
904                 chip_name = "polaris10";
905                 break;
906         case CHIP_STONEY:
907                 chip_name = "stoney";
908                 break;
909         default:
910                 BUG();
911         }
912
913         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
914         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
915         if (err)
916                 goto out;
917         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
918         if (err)
919                 goto out;
920         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
921         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
922         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
923
924         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
925         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
926         if (err)
927                 goto out;
928         err = amdgpu_ucode_validate(adev->gfx.me_fw);
929         if (err)
930                 goto out;
931         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
932         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934
935         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
936         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
937         if (err)
938                 goto out;
939         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
940         if (err)
941                 goto out;
942         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
943         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
945
946         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
947         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
948         if (err)
949                 goto out;
950         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
951         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
952         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
953         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
954
955         adev->gfx.rlc.save_and_restore_offset =
956                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
957         adev->gfx.rlc.clear_state_descriptor_offset =
958                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
959         adev->gfx.rlc.avail_scratch_ram_locations =
960                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
961         adev->gfx.rlc.reg_restore_list_size =
962                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
963         adev->gfx.rlc.reg_list_format_start =
964                         le32_to_cpu(rlc_hdr->reg_list_format_start);
965         adev->gfx.rlc.reg_list_format_separate_start =
966                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
967         adev->gfx.rlc.starting_offsets_start =
968                         le32_to_cpu(rlc_hdr->starting_offsets_start);
969         adev->gfx.rlc.reg_list_format_size_bytes =
970                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
971         adev->gfx.rlc.reg_list_size_bytes =
972                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
973
974         adev->gfx.rlc.register_list_format =
975                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
976                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
977
978         if (!adev->gfx.rlc.register_list_format) {
979                 err = -ENOMEM;
980                 goto out;
981         }
982
983         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
984                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
985         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
986                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
987
988         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
989
990         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
991                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
992         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
993                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
994
995         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
996         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
997         if (err)
998                 goto out;
999         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1000         if (err)
1001                 goto out;
1002         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1003         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1004         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1005
1006         if ((adev->asic_type != CHIP_STONEY) &&
1007             (adev->asic_type != CHIP_TOPAZ)) {
1008                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1009                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1010                 if (!err) {
1011                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1012                         if (err)
1013                                 goto out;
1014                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1015                                 adev->gfx.mec2_fw->data;
1016                         adev->gfx.mec2_fw_version =
1017                                 le32_to_cpu(cp_hdr->header.ucode_version);
1018                         adev->gfx.mec2_feature_version =
1019                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1020                 } else {
1021                         err = 0;
1022                         adev->gfx.mec2_fw = NULL;
1023                 }
1024         }
1025
1026         if (adev->firmware.smu_load) {
1027                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1028                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1029                 info->fw = adev->gfx.pfp_fw;
1030                 header = (const struct common_firmware_header *)info->fw->data;
1031                 adev->firmware.fw_size +=
1032                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1033
1034                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1035                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1036                 info->fw = adev->gfx.me_fw;
1037                 header = (const struct common_firmware_header *)info->fw->data;
1038                 adev->firmware.fw_size +=
1039                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1040
1041                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1042                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1043                 info->fw = adev->gfx.ce_fw;
1044                 header = (const struct common_firmware_header *)info->fw->data;
1045                 adev->firmware.fw_size +=
1046                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1047
1048                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1049                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1050                 info->fw = adev->gfx.rlc_fw;
1051                 header = (const struct common_firmware_header *)info->fw->data;
1052                 adev->firmware.fw_size +=
1053                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054
1055                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1056                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1057                 info->fw = adev->gfx.mec_fw;
1058                 header = (const struct common_firmware_header *)info->fw->data;
1059                 adev->firmware.fw_size +=
1060                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061
1062                 /* we need account JT in */
1063                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1064                 adev->firmware.fw_size +=
1065                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1066
1067                 if (amdgpu_sriov_vf(adev)) {
1068                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1069                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1070                         info->fw = adev->gfx.mec_fw;
1071                         adev->firmware.fw_size +=
1072                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1073                 }
1074
1075                 if (adev->gfx.mec2_fw) {
1076                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1077                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1078                         info->fw = adev->gfx.mec2_fw;
1079                         header = (const struct common_firmware_header *)info->fw->data;
1080                         adev->firmware.fw_size +=
1081                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1082                 }
1083
1084         }
1085
1086 out:
1087         if (err) {
1088                 dev_err(adev->dev,
1089                         "gfx8: Failed to load firmware \"%s\"\n",
1090                         fw_name);
1091                 release_firmware(adev->gfx.pfp_fw);
1092                 adev->gfx.pfp_fw = NULL;
1093                 release_firmware(adev->gfx.me_fw);
1094                 adev->gfx.me_fw = NULL;
1095                 release_firmware(adev->gfx.ce_fw);
1096                 adev->gfx.ce_fw = NULL;
1097                 release_firmware(adev->gfx.rlc_fw);
1098                 adev->gfx.rlc_fw = NULL;
1099                 release_firmware(adev->gfx.mec_fw);
1100                 adev->gfx.mec_fw = NULL;
1101                 release_firmware(adev->gfx.mec2_fw);
1102                 adev->gfx.mec2_fw = NULL;
1103         }
1104         return err;
1105 }
1106
1107 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1108                                     volatile u32 *buffer)
1109 {
1110         u32 count = 0, i;
1111         const struct cs_section_def *sect = NULL;
1112         const struct cs_extent_def *ext = NULL;
1113
1114         if (adev->gfx.rlc.cs_data == NULL)
1115                 return;
1116         if (buffer == NULL)
1117                 return;
1118
1119         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1120         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1121
1122         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1123         buffer[count++] = cpu_to_le32(0x80000000);
1124         buffer[count++] = cpu_to_le32(0x80000000);
1125
1126         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1127                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1128                         if (sect->id == SECT_CONTEXT) {
1129                                 buffer[count++] =
1130                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1131                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1132                                                 PACKET3_SET_CONTEXT_REG_START);
1133                                 for (i = 0; i < ext->reg_count; i++)
1134                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1135                         } else {
1136                                 return;
1137                         }
1138                 }
1139         }
1140
1141         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1142         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1143                         PACKET3_SET_CONTEXT_REG_START);
1144         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1145         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1146
1147         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1148         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1149
1150         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1151         buffer[count++] = cpu_to_le32(0);
1152 }
1153
1154 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1155 {
1156         const __le32 *fw_data;
1157         volatile u32 *dst_ptr;
1158         int me, i, max_me = 4;
1159         u32 bo_offset = 0;
1160         u32 table_offset, table_size;
1161
1162         if (adev->asic_type == CHIP_CARRIZO)
1163                 max_me = 5;
1164
1165         /* write the cp table buffer */
1166         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1167         for (me = 0; me < max_me; me++) {
1168                 if (me == 0) {
1169                         const struct gfx_firmware_header_v1_0 *hdr =
1170                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1171                         fw_data = (const __le32 *)
1172                                 (adev->gfx.ce_fw->data +
1173                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1174                         table_offset = le32_to_cpu(hdr->jt_offset);
1175                         table_size = le32_to_cpu(hdr->jt_size);
1176                 } else if (me == 1) {
1177                         const struct gfx_firmware_header_v1_0 *hdr =
1178                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1179                         fw_data = (const __le32 *)
1180                                 (adev->gfx.pfp_fw->data +
1181                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1182                         table_offset = le32_to_cpu(hdr->jt_offset);
1183                         table_size = le32_to_cpu(hdr->jt_size);
1184                 } else if (me == 2) {
1185                         const struct gfx_firmware_header_v1_0 *hdr =
1186                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1187                         fw_data = (const __le32 *)
1188                                 (adev->gfx.me_fw->data +
1189                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1190                         table_offset = le32_to_cpu(hdr->jt_offset);
1191                         table_size = le32_to_cpu(hdr->jt_size);
1192                 } else if (me == 3) {
1193                         const struct gfx_firmware_header_v1_0 *hdr =
1194                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1195                         fw_data = (const __le32 *)
1196                                 (adev->gfx.mec_fw->data +
1197                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1198                         table_offset = le32_to_cpu(hdr->jt_offset);
1199                         table_size = le32_to_cpu(hdr->jt_size);
1200                 } else  if (me == 4) {
1201                         const struct gfx_firmware_header_v1_0 *hdr =
1202                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1203                         fw_data = (const __le32 *)
1204                                 (adev->gfx.mec2_fw->data +
1205                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1206                         table_offset = le32_to_cpu(hdr->jt_offset);
1207                         table_size = le32_to_cpu(hdr->jt_size);
1208                 }
1209
1210                 for (i = 0; i < table_size; i ++) {
1211                         dst_ptr[bo_offset + i] =
1212                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1213                 }
1214
1215                 bo_offset += table_size;
1216         }
1217 }
1218
1219 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1220 {
1221         int r;
1222
1223         /* clear state block */
1224         if (adev->gfx.rlc.clear_state_obj) {
1225                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1226                 if (unlikely(r != 0))
1227                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1228                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1229                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1230                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1231                 adev->gfx.rlc.clear_state_obj = NULL;
1232         }
1233
1234         /* jump table block */
1235         if (adev->gfx.rlc.cp_table_obj) {
1236                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1237                 if (unlikely(r != 0))
1238                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1239                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1240                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1241                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1242                 adev->gfx.rlc.cp_table_obj = NULL;
1243         }
1244 }
1245
1246 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1247 {
1248         volatile u32 *dst_ptr;
1249         u32 dws;
1250         const struct cs_section_def *cs_data;
1251         int r;
1252
1253         adev->gfx.rlc.cs_data = vi_cs_data;
1254
1255         cs_data = adev->gfx.rlc.cs_data;
1256
1257         if (cs_data) {
1258                 /* clear state block */
1259                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1260
1261                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1262                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1263                                              AMDGPU_GEM_DOMAIN_VRAM,
1264                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1265                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1266                                              NULL, NULL,
1267                                              &adev->gfx.rlc.clear_state_obj);
1268                         if (r) {
1269                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1270                                 gfx_v8_0_rlc_fini(adev);
1271                                 return r;
1272                         }
1273                 }
1274                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1275                 if (unlikely(r != 0)) {
1276                         gfx_v8_0_rlc_fini(adev);
1277                         return r;
1278                 }
1279                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1280                                   &adev->gfx.rlc.clear_state_gpu_addr);
1281                 if (r) {
1282                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1283                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1284                         gfx_v8_0_rlc_fini(adev);
1285                         return r;
1286                 }
1287
1288                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1289                 if (r) {
1290                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1291                         gfx_v8_0_rlc_fini(adev);
1292                         return r;
1293                 }
1294                 /* set up the cs buffer */
1295                 dst_ptr = adev->gfx.rlc.cs_ptr;
1296                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1297                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1298                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1299         }
1300
1301         if ((adev->asic_type == CHIP_CARRIZO) ||
1302             (adev->asic_type == CHIP_STONEY)) {
1303                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1304                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1305                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1306                                              AMDGPU_GEM_DOMAIN_VRAM,
1307                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1308                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1309                                              NULL, NULL,
1310                                              &adev->gfx.rlc.cp_table_obj);
1311                         if (r) {
1312                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1313                                 return r;
1314                         }
1315                 }
1316
1317                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1318                 if (unlikely(r != 0)) {
1319                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1320                         return r;
1321                 }
1322                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1323                                   &adev->gfx.rlc.cp_table_gpu_addr);
1324                 if (r) {
1325                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1326                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1327                         return r;
1328                 }
1329                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1330                 if (r) {
1331                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1332                         return r;
1333                 }
1334
1335                 cz_init_cp_jump_table(adev);
1336
1337                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1338                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1339         }
1340
1341         return 0;
1342 }
1343
1344 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1345 {
1346         int r;
1347
1348         if (adev->gfx.mec.hpd_eop_obj) {
1349                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1350                 if (unlikely(r != 0))
1351                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1352                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1353                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1354                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1355                 adev->gfx.mec.hpd_eop_obj = NULL;
1356         }
1357 }
1358
1359 #define MEC_HPD_SIZE 2048
1360
1361 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1362 {
1363         int r;
1364         u32 *hpd;
1365
1366         /*
1367          * we assign only 1 pipe because all other pipes will
1368          * be handled by KFD
1369          */
1370         adev->gfx.mec.num_mec = 1;
1371         adev->gfx.mec.num_pipe = 1;
1372         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1373
1374         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1375                 r = amdgpu_bo_create(adev,
1376                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1377                                      PAGE_SIZE, true,
1378                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1379                                      &adev->gfx.mec.hpd_eop_obj);
1380                 if (r) {
1381                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1382                         return r;
1383                 }
1384         }
1385
1386         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1387         if (unlikely(r != 0)) {
1388                 gfx_v8_0_mec_fini(adev);
1389                 return r;
1390         }
1391         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1392                           &adev->gfx.mec.hpd_eop_gpu_addr);
1393         if (r) {
1394                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1395                 gfx_v8_0_mec_fini(adev);
1396                 return r;
1397         }
1398         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1399         if (r) {
1400                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1401                 gfx_v8_0_mec_fini(adev);
1402                 return r;
1403         }
1404
1405         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1406
1407         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1408         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1409
1410         return 0;
1411 }
1412
1413 static const u32 vgpr_init_compute_shader[] =
1414 {
1415         0x7e000209, 0x7e020208,
1416         0x7e040207, 0x7e060206,
1417         0x7e080205, 0x7e0a0204,
1418         0x7e0c0203, 0x7e0e0202,
1419         0x7e100201, 0x7e120200,
1420         0x7e140209, 0x7e160208,
1421         0x7e180207, 0x7e1a0206,
1422         0x7e1c0205, 0x7e1e0204,
1423         0x7e200203, 0x7e220202,
1424         0x7e240201, 0x7e260200,
1425         0x7e280209, 0x7e2a0208,
1426         0x7e2c0207, 0x7e2e0206,
1427         0x7e300205, 0x7e320204,
1428         0x7e340203, 0x7e360202,
1429         0x7e380201, 0x7e3a0200,
1430         0x7e3c0209, 0x7e3e0208,
1431         0x7e400207, 0x7e420206,
1432         0x7e440205, 0x7e460204,
1433         0x7e480203, 0x7e4a0202,
1434         0x7e4c0201, 0x7e4e0200,
1435         0x7e500209, 0x7e520208,
1436         0x7e540207, 0x7e560206,
1437         0x7e580205, 0x7e5a0204,
1438         0x7e5c0203, 0x7e5e0202,
1439         0x7e600201, 0x7e620200,
1440         0x7e640209, 0x7e660208,
1441         0x7e680207, 0x7e6a0206,
1442         0x7e6c0205, 0x7e6e0204,
1443         0x7e700203, 0x7e720202,
1444         0x7e740201, 0x7e760200,
1445         0x7e780209, 0x7e7a0208,
1446         0x7e7c0207, 0x7e7e0206,
1447         0xbf8a0000, 0xbf810000,
1448 };
1449
1450 static const u32 sgpr_init_compute_shader[] =
1451 {
1452         0xbe8a0100, 0xbe8c0102,
1453         0xbe8e0104, 0xbe900106,
1454         0xbe920108, 0xbe940100,
1455         0xbe960102, 0xbe980104,
1456         0xbe9a0106, 0xbe9c0108,
1457         0xbe9e0100, 0xbea00102,
1458         0xbea20104, 0xbea40106,
1459         0xbea60108, 0xbea80100,
1460         0xbeaa0102, 0xbeac0104,
1461         0xbeae0106, 0xbeb00108,
1462         0xbeb20100, 0xbeb40102,
1463         0xbeb60104, 0xbeb80106,
1464         0xbeba0108, 0xbebc0100,
1465         0xbebe0102, 0xbec00104,
1466         0xbec20106, 0xbec40108,
1467         0xbec60100, 0xbec80102,
1468         0xbee60004, 0xbee70005,
1469         0xbeea0006, 0xbeeb0007,
1470         0xbee80008, 0xbee90009,
1471         0xbefc0000, 0xbf8a0000,
1472         0xbf810000, 0x00000000,
1473 };
1474
1475 static const u32 vgpr_init_regs[] =
1476 {
1477         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1478         mmCOMPUTE_RESOURCE_LIMITS, 0,
1479         mmCOMPUTE_NUM_THREAD_X, 256*4,
1480         mmCOMPUTE_NUM_THREAD_Y, 1,
1481         mmCOMPUTE_NUM_THREAD_Z, 1,
1482         mmCOMPUTE_PGM_RSRC2, 20,
1483         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1484         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1485         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1486         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1487         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1488         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1489         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1490         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1491         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1492         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1493 };
1494
1495 static const u32 sgpr1_init_regs[] =
1496 {
1497         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1498         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1499         mmCOMPUTE_NUM_THREAD_X, 256*5,
1500         mmCOMPUTE_NUM_THREAD_Y, 1,
1501         mmCOMPUTE_NUM_THREAD_Z, 1,
1502         mmCOMPUTE_PGM_RSRC2, 20,
1503         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1504         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1505         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1506         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1507         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1508         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1509         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1510         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1511         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1512         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1513 };
1514
1515 static const u32 sgpr2_init_regs[] =
1516 {
1517         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1518         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1519         mmCOMPUTE_NUM_THREAD_X, 256*5,
1520         mmCOMPUTE_NUM_THREAD_Y, 1,
1521         mmCOMPUTE_NUM_THREAD_Z, 1,
1522         mmCOMPUTE_PGM_RSRC2, 20,
1523         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1524         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1525         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1526         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1527         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1528         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1529         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1530         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1531         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1532         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1533 };
1534
1535 static const u32 sec_ded_counter_registers[] =
1536 {
1537         mmCPC_EDC_ATC_CNT,
1538         mmCPC_EDC_SCRATCH_CNT,
1539         mmCPC_EDC_UCODE_CNT,
1540         mmCPF_EDC_ATC_CNT,
1541         mmCPF_EDC_ROQ_CNT,
1542         mmCPF_EDC_TAG_CNT,
1543         mmCPG_EDC_ATC_CNT,
1544         mmCPG_EDC_DMA_CNT,
1545         mmCPG_EDC_TAG_CNT,
1546         mmDC_EDC_CSINVOC_CNT,
1547         mmDC_EDC_RESTORE_CNT,
1548         mmDC_EDC_STATE_CNT,
1549         mmGDS_EDC_CNT,
1550         mmGDS_EDC_GRBM_CNT,
1551         mmGDS_EDC_OA_DED,
1552         mmSPI_EDC_CNT,
1553         mmSQC_ATC_EDC_GATCL1_CNT,
1554         mmSQC_EDC_CNT,
1555         mmSQ_EDC_DED_CNT,
1556         mmSQ_EDC_INFO,
1557         mmSQ_EDC_SEC_CNT,
1558         mmTCC_EDC_CNT,
1559         mmTCP_ATC_EDC_GATCL1_CNT,
1560         mmTCP_EDC_CNT,
1561         mmTD_EDC_CNT
1562 };
1563
1564 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1565 {
1566         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1567         struct amdgpu_ib ib;
1568         struct dma_fence *f = NULL;
1569         int r, i;
1570         u32 tmp;
1571         unsigned total_size, vgpr_offset, sgpr_offset;
1572         u64 gpu_addr;
1573
1574         /* only supported on CZ */
1575         if (adev->asic_type != CHIP_CARRIZO)
1576                 return 0;
1577
1578         /* bail if the compute ring is not ready */
1579         if (!ring->ready)
1580                 return 0;
1581
1582         tmp = RREG32(mmGB_EDC_MODE);
1583         WREG32(mmGB_EDC_MODE, 0);
1584
1585         total_size =
1586                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1587         total_size +=
1588                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1589         total_size +=
1590                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1591         total_size = ALIGN(total_size, 256);
1592         vgpr_offset = total_size;
1593         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1594         sgpr_offset = total_size;
1595         total_size += sizeof(sgpr_init_compute_shader);
1596
1597         /* allocate an indirect buffer to put the commands in */
1598         memset(&ib, 0, sizeof(ib));
1599         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1600         if (r) {
1601                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1602                 return r;
1603         }
1604
1605         /* load the compute shaders */
1606         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1607                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1608
1609         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1610                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1611
1612         /* init the ib length to 0 */
1613         ib.length_dw = 0;
1614
1615         /* VGPR */
1616         /* write the register state for the compute dispatch */
1617         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1618                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1619                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1620                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1621         }
1622         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1623         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1624         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1625         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1626         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1627         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1628
1629         /* write dispatch packet */
1630         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1631         ib.ptr[ib.length_dw++] = 8; /* x */
1632         ib.ptr[ib.length_dw++] = 1; /* y */
1633         ib.ptr[ib.length_dw++] = 1; /* z */
1634         ib.ptr[ib.length_dw++] =
1635                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1636
1637         /* write CS partial flush packet */
1638         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1639         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1640
1641         /* SGPR1 */
1642         /* write the register state for the compute dispatch */
1643         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1644                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1645                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1646                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1647         }
1648         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1649         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1650         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1651         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1652         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1653         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1654
1655         /* write dispatch packet */
1656         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1657         ib.ptr[ib.length_dw++] = 8; /* x */
1658         ib.ptr[ib.length_dw++] = 1; /* y */
1659         ib.ptr[ib.length_dw++] = 1; /* z */
1660         ib.ptr[ib.length_dw++] =
1661                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1662
1663         /* write CS partial flush packet */
1664         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1665         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1666
1667         /* SGPR2 */
1668         /* write the register state for the compute dispatch */
1669         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1670                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1671                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1672                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1673         }
1674         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1675         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1676         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1677         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1678         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1679         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1680
1681         /* write dispatch packet */
1682         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1683         ib.ptr[ib.length_dw++] = 8; /* x */
1684         ib.ptr[ib.length_dw++] = 1; /* y */
1685         ib.ptr[ib.length_dw++] = 1; /* z */
1686         ib.ptr[ib.length_dw++] =
1687                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1688
1689         /* write CS partial flush packet */
1690         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1691         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1692
1693         /* shedule the ib on the ring */
1694         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1695         if (r) {
1696                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1697                 goto fail;
1698         }
1699
1700         /* wait for the GPU to finish processing the IB */
1701         r = dma_fence_wait(f, false);
1702         if (r) {
1703                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1704                 goto fail;
1705         }
1706
1707         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1708         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1709         WREG32(mmGB_EDC_MODE, tmp);
1710
1711         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1712         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1713         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1714
1715
1716         /* read back registers to clear the counters */
1717         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1718                 RREG32(sec_ded_counter_registers[i]);
1719
1720 fail:
1721         amdgpu_ib_free(adev, &ib, NULL);
1722         dma_fence_put(f);
1723
1724         return r;
1725 }
1726
1727 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1728 {
1729         u32 gb_addr_config;
1730         u32 mc_shared_chmap, mc_arb_ramcfg;
1731         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1732         u32 tmp;
1733         int ret;
1734
1735         switch (adev->asic_type) {
1736         case CHIP_TOPAZ:
1737                 adev->gfx.config.max_shader_engines = 1;
1738                 adev->gfx.config.max_tile_pipes = 2;
1739                 adev->gfx.config.max_cu_per_sh = 6;
1740                 adev->gfx.config.max_sh_per_se = 1;
1741                 adev->gfx.config.max_backends_per_se = 2;
1742                 adev->gfx.config.max_texture_channel_caches = 2;
1743                 adev->gfx.config.max_gprs = 256;
1744                 adev->gfx.config.max_gs_threads = 32;
1745                 adev->gfx.config.max_hw_contexts = 8;
1746
1747                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1752                 break;
1753         case CHIP_FIJI:
1754                 adev->gfx.config.max_shader_engines = 4;
1755                 adev->gfx.config.max_tile_pipes = 16;
1756                 adev->gfx.config.max_cu_per_sh = 16;
1757                 adev->gfx.config.max_sh_per_se = 1;
1758                 adev->gfx.config.max_backends_per_se = 4;
1759                 adev->gfx.config.max_texture_channel_caches = 16;
1760                 adev->gfx.config.max_gprs = 256;
1761                 adev->gfx.config.max_gs_threads = 32;
1762                 adev->gfx.config.max_hw_contexts = 8;
1763
1764                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1765                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1766                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1767                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1768                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1769                 break;
1770         case CHIP_POLARIS11:
1771                 ret = amdgpu_atombios_get_gfx_info(adev);
1772                 if (ret)
1773                         return ret;
1774                 adev->gfx.config.max_gprs = 256;
1775                 adev->gfx.config.max_gs_threads = 32;
1776                 adev->gfx.config.max_hw_contexts = 8;
1777
1778                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1779                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1780                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1781                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1782                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1783                 break;
1784         case CHIP_POLARIS10:
1785                 ret = amdgpu_atombios_get_gfx_info(adev);
1786                 if (ret)
1787                         return ret;
1788                 adev->gfx.config.max_gprs = 256;
1789                 adev->gfx.config.max_gs_threads = 32;
1790                 adev->gfx.config.max_hw_contexts = 8;
1791
1792                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1793                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1794                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1795                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1796                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1797                 break;
1798         case CHIP_TONGA:
1799                 adev->gfx.config.max_shader_engines = 4;
1800                 adev->gfx.config.max_tile_pipes = 8;
1801                 adev->gfx.config.max_cu_per_sh = 8;
1802                 adev->gfx.config.max_sh_per_se = 1;
1803                 adev->gfx.config.max_backends_per_se = 2;
1804                 adev->gfx.config.max_texture_channel_caches = 8;
1805                 adev->gfx.config.max_gprs = 256;
1806                 adev->gfx.config.max_gs_threads = 32;
1807                 adev->gfx.config.max_hw_contexts = 8;
1808
1809                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1810                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1811                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1812                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1813                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1814                 break;
1815         case CHIP_CARRIZO:
1816                 adev->gfx.config.max_shader_engines = 1;
1817                 adev->gfx.config.max_tile_pipes = 2;
1818                 adev->gfx.config.max_sh_per_se = 1;
1819                 adev->gfx.config.max_backends_per_se = 2;
1820
1821                 switch (adev->pdev->revision) {
1822                 case 0xc4:
1823                 case 0x84:
1824                 case 0xc8:
1825                 case 0xcc:
1826                 case 0xe1:
1827                 case 0xe3:
1828                         /* B10 */
1829                         adev->gfx.config.max_cu_per_sh = 8;
1830                         break;
1831                 case 0xc5:
1832                 case 0x81:
1833                 case 0x85:
1834                 case 0xc9:
1835                 case 0xcd:
1836                 case 0xe2:
1837                 case 0xe4:
1838                         /* B8 */
1839                         adev->gfx.config.max_cu_per_sh = 6;
1840                         break;
1841                 case 0xc6:
1842                 case 0xca:
1843                 case 0xce:
1844                 case 0x88:
1845                         /* B6 */
1846                         adev->gfx.config.max_cu_per_sh = 6;
1847                         break;
1848                 case 0xc7:
1849                 case 0x87:
1850                 case 0xcb:
1851                 case 0xe5:
1852                 case 0x89:
1853                 default:
1854                         /* B4 */
1855                         adev->gfx.config.max_cu_per_sh = 4;
1856                         break;
1857                 }
1858
1859                 adev->gfx.config.max_texture_channel_caches = 2;
1860                 adev->gfx.config.max_gprs = 256;
1861                 adev->gfx.config.max_gs_threads = 32;
1862                 adev->gfx.config.max_hw_contexts = 8;
1863
1864                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869                 break;
1870         case CHIP_STONEY:
1871                 adev->gfx.config.max_shader_engines = 1;
1872                 adev->gfx.config.max_tile_pipes = 2;
1873                 adev->gfx.config.max_sh_per_se = 1;
1874                 adev->gfx.config.max_backends_per_se = 1;
1875
1876                 switch (adev->pdev->revision) {
1877                 case 0xc0:
1878                 case 0xc1:
1879                 case 0xc2:
1880                 case 0xc4:
1881                 case 0xc8:
1882                 case 0xc9:
1883                         adev->gfx.config.max_cu_per_sh = 3;
1884                         break;
1885                 case 0xd0:
1886                 case 0xd1:
1887                 case 0xd2:
1888                 default:
1889                         adev->gfx.config.max_cu_per_sh = 2;
1890                         break;
1891                 }
1892
1893                 adev->gfx.config.max_texture_channel_caches = 2;
1894                 adev->gfx.config.max_gprs = 256;
1895                 adev->gfx.config.max_gs_threads = 16;
1896                 adev->gfx.config.max_hw_contexts = 8;
1897
1898                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1903                 break;
1904         default:
1905                 adev->gfx.config.max_shader_engines = 2;
1906                 adev->gfx.config.max_tile_pipes = 4;
1907                 adev->gfx.config.max_cu_per_sh = 2;
1908                 adev->gfx.config.max_sh_per_se = 1;
1909                 adev->gfx.config.max_backends_per_se = 2;
1910                 adev->gfx.config.max_texture_channel_caches = 4;
1911                 adev->gfx.config.max_gprs = 256;
1912                 adev->gfx.config.max_gs_threads = 32;
1913                 adev->gfx.config.max_hw_contexts = 8;
1914
1915                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1916                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1917                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1918                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1919                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1920                 break;
1921         }
1922
1923         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1924         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1925         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1926
1927         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1928         adev->gfx.config.mem_max_burst_length_bytes = 256;
1929         if (adev->flags & AMD_IS_APU) {
1930                 /* Get memory bank mapping mode. */
1931                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1932                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1933                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1934
1935                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1936                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1937                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1938
1939                 /* Validate settings in case only one DIMM installed. */
1940                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1941                         dimm00_addr_map = 0;
1942                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1943                         dimm01_addr_map = 0;
1944                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1945                         dimm10_addr_map = 0;
1946                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1947                         dimm11_addr_map = 0;
1948
1949                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1950                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1951                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1952                         adev->gfx.config.mem_row_size_in_kb = 2;
1953                 else
1954                         adev->gfx.config.mem_row_size_in_kb = 1;
1955         } else {
1956                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1957                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1958                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1959                         adev->gfx.config.mem_row_size_in_kb = 4;
1960         }
1961
1962         adev->gfx.config.shader_engine_tile_size = 32;
1963         adev->gfx.config.num_gpus = 1;
1964         adev->gfx.config.multi_gpu_tile_size = 64;
1965
1966         /* fix up row size */
1967         switch (adev->gfx.config.mem_row_size_in_kb) {
1968         case 1:
1969         default:
1970                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1971                 break;
1972         case 2:
1973                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1974                 break;
1975         case 4:
1976                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1977                 break;
1978         }
1979         adev->gfx.config.gb_addr_config = gb_addr_config;
1980
1981         return 0;
1982 }
1983
1984 static int gfx_v8_0_sw_init(void *handle)
1985 {
1986         int i, r;
1987         struct amdgpu_ring *ring;
1988         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1989
1990         /* EOP Event */
1991         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1992         if (r)
1993                 return r;
1994
1995         /* Privileged reg */
1996         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1997         if (r)
1998                 return r;
1999
2000         /* Privileged inst */
2001         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2002         if (r)
2003                 return r;
2004
2005         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2006
2007         gfx_v8_0_scratch_init(adev);
2008
2009         r = gfx_v8_0_init_microcode(adev);
2010         if (r) {
2011                 DRM_ERROR("Failed to load gfx firmware!\n");
2012                 return r;
2013         }
2014
2015         r = gfx_v8_0_rlc_init(adev);
2016         if (r) {
2017                 DRM_ERROR("Failed to init rlc BOs!\n");
2018                 return r;
2019         }
2020
2021         r = gfx_v8_0_mec_init(adev);
2022         if (r) {
2023                 DRM_ERROR("Failed to init MEC BOs!\n");
2024                 return r;
2025         }
2026
2027         /* set up the gfx ring */
2028         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2029                 ring = &adev->gfx.gfx_ring[i];
2030                 ring->ring_obj = NULL;
2031                 sprintf(ring->name, "gfx");
2032                 /* no gfx doorbells on iceland */
2033                 if (adev->asic_type != CHIP_TOPAZ) {
2034                         ring->use_doorbell = true;
2035                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2036                 }
2037
2038                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2039                                      AMDGPU_CP_IRQ_GFX_EOP);
2040                 if (r)
2041                         return r;
2042         }
2043
2044         /* set up the compute queues */
2045         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2046                 unsigned irq_type;
2047
2048                 /* max 32 queues per MEC */
2049                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2050                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2051                         break;
2052                 }
2053                 ring = &adev->gfx.compute_ring[i];
2054                 ring->ring_obj = NULL;
2055                 ring->use_doorbell = true;
2056                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2057                 ring->me = 1; /* first MEC */
2058                 ring->pipe = i / 8;
2059                 ring->queue = i % 8;
2060                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2061                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2062                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2063                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2064                                      irq_type);
2065                 if (r)
2066                         return r;
2067         }
2068
2069         /* reserve GDS, GWS and OA resource for gfx */
2070         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2071                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2072                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2073         if (r)
2074                 return r;
2075
2076         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2077                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2078                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2079         if (r)
2080                 return r;
2081
2082         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2083                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2084                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2085         if (r)
2086                 return r;
2087
2088         adev->gfx.ce_ram_size = 0x8000;
2089
2090         r = gfx_v8_0_gpu_early_init(adev);
2091         if (r)
2092                 return r;
2093
2094         return 0;
2095 }
2096
2097 static int gfx_v8_0_sw_fini(void *handle)
2098 {
2099         int i;
2100         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2101
2102         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2103         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2104         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2105
2106         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2107                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2108         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2109                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2110
2111         gfx_v8_0_mec_fini(adev);
2112         gfx_v8_0_rlc_fini(adev);
2113         gfx_v8_0_free_microcode(adev);
2114
2115         return 0;
2116 }
2117
2118 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2119 {
2120         uint32_t *modearray, *mod2array;
2121         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2122         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2123         u32 reg_offset;
2124
2125         modearray = adev->gfx.config.tile_mode_array;
2126         mod2array = adev->gfx.config.macrotile_mode_array;
2127
2128         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2129                 modearray[reg_offset] = 0;
2130
2131         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2132                 mod2array[reg_offset] = 0;
2133
2134         switch (adev->asic_type) {
2135         case CHIP_TOPAZ:
2136                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2137                                 PIPE_CONFIG(ADDR_SURF_P2) |
2138                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2139                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2140                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2141                                 PIPE_CONFIG(ADDR_SURF_P2) |
2142                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2143                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2144                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                 PIPE_CONFIG(ADDR_SURF_P2) |
2146                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2147                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149                                 PIPE_CONFIG(ADDR_SURF_P2) |
2150                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2151                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                 PIPE_CONFIG(ADDR_SURF_P2) |
2154                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2155                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2157                                 PIPE_CONFIG(ADDR_SURF_P2) |
2158                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2161                                 PIPE_CONFIG(ADDR_SURF_P2) |
2162                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2163                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2));
2166                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2167                                 PIPE_CONFIG(ADDR_SURF_P2) |
2168                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2169                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171                                  PIPE_CONFIG(ADDR_SURF_P2) |
2172                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175                                  PIPE_CONFIG(ADDR_SURF_P2) |
2176                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2177                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2178                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179                                  PIPE_CONFIG(ADDR_SURF_P2) |
2180                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2181                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183                                  PIPE_CONFIG(ADDR_SURF_P2) |
2184                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2185                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2194                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2198                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2202                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2206                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2223                                  PIPE_CONFIG(ADDR_SURF_P2) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227                                  PIPE_CONFIG(ADDR_SURF_P2) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2230                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231                                  PIPE_CONFIG(ADDR_SURF_P2) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2234                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2235                                  PIPE_CONFIG(ADDR_SURF_P2) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2238
2239                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2240                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2241                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2242                                 NUM_BANKS(ADDR_SURF_8_BANK));
2243                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2244                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246                                 NUM_BANKS(ADDR_SURF_8_BANK));
2247                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2248                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250                                 NUM_BANKS(ADDR_SURF_8_BANK));
2251                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2254                                 NUM_BANKS(ADDR_SURF_8_BANK));
2255                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                 NUM_BANKS(ADDR_SURF_8_BANK));
2259                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262                                 NUM_BANKS(ADDR_SURF_8_BANK));
2263                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2265                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266                                 NUM_BANKS(ADDR_SURF_8_BANK));
2267                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2268                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2269                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270                                 NUM_BANKS(ADDR_SURF_16_BANK));
2271                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2272                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274                                 NUM_BANKS(ADDR_SURF_16_BANK));
2275                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2276                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2277                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2278                                  NUM_BANKS(ADDR_SURF_16_BANK));
2279                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2281                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282                                  NUM_BANKS(ADDR_SURF_16_BANK));
2283                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2285                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286                                  NUM_BANKS(ADDR_SURF_16_BANK));
2287                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2289                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290                                  NUM_BANKS(ADDR_SURF_16_BANK));
2291                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2293                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2294                                  NUM_BANKS(ADDR_SURF_8_BANK));
2295
2296                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2297                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2298                             reg_offset != 23)
2299                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2300
2301                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2302                         if (reg_offset != 7)
2303                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2304
2305                 break;
2306         case CHIP_FIJI:
2307                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2311                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2315                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2322                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2328                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2336                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2337                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2341                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2344                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2349                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2353                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2354                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2357                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2374                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2377                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2382                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2385                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2398                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2406                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2410                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2425                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2429
2430                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2432                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433                                 NUM_BANKS(ADDR_SURF_8_BANK));
2434                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2436                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437                                 NUM_BANKS(ADDR_SURF_8_BANK));
2438                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441                                 NUM_BANKS(ADDR_SURF_8_BANK));
2442                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445                                 NUM_BANKS(ADDR_SURF_8_BANK));
2446                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2449                                 NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2453                                 NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457                                 NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461                                 NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465                                 NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469                                  NUM_BANKS(ADDR_SURF_8_BANK));
2470                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473                                  NUM_BANKS(ADDR_SURF_8_BANK));
2474                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477                                  NUM_BANKS(ADDR_SURF_8_BANK));
2478                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481                                  NUM_BANKS(ADDR_SURF_8_BANK));
2482                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485                                  NUM_BANKS(ADDR_SURF_4_BANK));
2486
2487                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2488                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2489
2490                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2491                         if (reg_offset != 7)
2492                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2493
2494                 break;
2495         case CHIP_TONGA:
2496                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2499                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2500                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2503                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2504                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2507                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2511                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2517                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2525                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2526                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2530                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2533                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2534                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2537                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2538                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2542                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2544                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2546                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2549                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2555                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2563                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2566                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2570                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2571                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2574                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2587                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2595                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2599                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2605                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2609                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2614                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2615                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2616                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2618
2619                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622                                 NUM_BANKS(ADDR_SURF_16_BANK));
2623                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2626                                 NUM_BANKS(ADDR_SURF_16_BANK));
2627                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630                                 NUM_BANKS(ADDR_SURF_16_BANK));
2631                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634                                 NUM_BANKS(ADDR_SURF_16_BANK));
2635                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2658                                  NUM_BANKS(ADDR_SURF_16_BANK));
2659                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2662                                  NUM_BANKS(ADDR_SURF_16_BANK));
2663                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2666                                  NUM_BANKS(ADDR_SURF_8_BANK));
2667                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2670                                  NUM_BANKS(ADDR_SURF_4_BANK));
2671                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674                                  NUM_BANKS(ADDR_SURF_4_BANK));
2675
2676                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2677                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2678
2679                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2680                         if (reg_offset != 7)
2681                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2682
2683                 break;
2684         case CHIP_POLARIS11:
2685                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2700                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2704                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2719                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807
2808                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811                                 NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816                                 NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2820                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2821                                 NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2825                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2826                                 NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841                                 NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2844                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2845                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846                                 NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2860                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861                                 NUM_BANKS(ADDR_SURF_16_BANK));
2862
2863                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2866                                 NUM_BANKS(ADDR_SURF_16_BANK));
2867
2868                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2871                                 NUM_BANKS(ADDR_SURF_8_BANK));
2872
2873                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2875                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2876                                 NUM_BANKS(ADDR_SURF_4_BANK));
2877
2878                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2879                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880
2881                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2882                         if (reg_offset != 7)
2883                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2884
2885                 break;
2886         case CHIP_POLARIS10:
2887                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2906                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2921                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2978                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009
3010                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013                                 NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                 NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023                                 NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028                                 NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033                                 NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3038                                 NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3043                                 NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3047                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048                                 NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053                                 NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3057                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3058                                 NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063                                 NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068                                 NUM_BANKS(ADDR_SURF_8_BANK));
3069
3070                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3072                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3073                                 NUM_BANKS(ADDR_SURF_4_BANK));
3074
3075                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3078                                 NUM_BANKS(ADDR_SURF_4_BANK));
3079
3080                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3081                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082
3083                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3084                         if (reg_offset != 7)
3085                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3086
3087                 break;
3088         case CHIP_STONEY:
3089                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094                                 PIPE_CONFIG(ADDR_SURF_P2) |
3095                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098                                 PIPE_CONFIG(ADDR_SURF_P2) |
3099                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102                                 PIPE_CONFIG(ADDR_SURF_P2) |
3103                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3104                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106                                 PIPE_CONFIG(ADDR_SURF_P2) |
3107                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3108                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2) |
3111                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P2) |
3115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3118                                 PIPE_CONFIG(ADDR_SURF_P2));
3119                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3120                                 PIPE_CONFIG(ADDR_SURF_P2) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3122                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3184                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188                                  PIPE_CONFIG(ADDR_SURF_P2) |
3189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191
3192                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195                                 NUM_BANKS(ADDR_SURF_8_BANK));
3196                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199                                 NUM_BANKS(ADDR_SURF_8_BANK));
3200                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203                                 NUM_BANKS(ADDR_SURF_8_BANK));
3204                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3207                                 NUM_BANKS(ADDR_SURF_8_BANK));
3208                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211                                 NUM_BANKS(ADDR_SURF_8_BANK));
3212                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215                                 NUM_BANKS(ADDR_SURF_8_BANK));
3216                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219                                 NUM_BANKS(ADDR_SURF_8_BANK));
3220                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223                                 NUM_BANKS(ADDR_SURF_16_BANK));
3224                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227                                 NUM_BANKS(ADDR_SURF_16_BANK));
3228                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3229                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231                                  NUM_BANKS(ADDR_SURF_16_BANK));
3232                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235                                  NUM_BANKS(ADDR_SURF_16_BANK));
3236                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3238                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239                                  NUM_BANKS(ADDR_SURF_16_BANK));
3240                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                  NUM_BANKS(ADDR_SURF_16_BANK));
3244                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247                                  NUM_BANKS(ADDR_SURF_8_BANK));
3248
3249                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3250                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251                             reg_offset != 23)
3252                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253
3254                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3255                         if (reg_offset != 7)
3256                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3257
3258                 break;
3259         default:
3260                 dev_warn(adev->dev,
3261                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3262                          adev->asic_type);
3263
3264         case CHIP_CARRIZO:
3265                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3266                                 PIPE_CONFIG(ADDR_SURF_P2) |
3267                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3268                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3270                                 PIPE_CONFIG(ADDR_SURF_P2) |
3271                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3272                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3274                                 PIPE_CONFIG(ADDR_SURF_P2) |
3275                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3276                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3278                                 PIPE_CONFIG(ADDR_SURF_P2) |
3279                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3280                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3281                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282                                 PIPE_CONFIG(ADDR_SURF_P2) |
3283                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3284                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3286                                 PIPE_CONFIG(ADDR_SURF_P2) |
3287                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3290                                 PIPE_CONFIG(ADDR_SURF_P2) |
3291                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3294                                 PIPE_CONFIG(ADDR_SURF_P2));
3295                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3296                                 PIPE_CONFIG(ADDR_SURF_P2) |
3297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3298                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3300                                  PIPE_CONFIG(ADDR_SURF_P2) |
3301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3303                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304                                  PIPE_CONFIG(ADDR_SURF_P2) |
3305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3307                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3308                                  PIPE_CONFIG(ADDR_SURF_P2) |
3309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312                                  PIPE_CONFIG(ADDR_SURF_P2) |
3313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3316                                  PIPE_CONFIG(ADDR_SURF_P2) |
3317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3359                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3360                                  PIPE_CONFIG(ADDR_SURF_P2) |
3361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3363                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3364                                  PIPE_CONFIG(ADDR_SURF_P2) |
3365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3367
3368                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3370                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371                                 NUM_BANKS(ADDR_SURF_8_BANK));
3372                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3374                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375                                 NUM_BANKS(ADDR_SURF_8_BANK));
3376                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379                                 NUM_BANKS(ADDR_SURF_8_BANK));
3380                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3383                                 NUM_BANKS(ADDR_SURF_8_BANK));
3384                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387                                 NUM_BANKS(ADDR_SURF_8_BANK));
3388                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3391                                 NUM_BANKS(ADDR_SURF_8_BANK));
3392                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395                                 NUM_BANKS(ADDR_SURF_8_BANK));
3396                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399                                 NUM_BANKS(ADDR_SURF_16_BANK));
3400                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3401                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3402                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3403                                 NUM_BANKS(ADDR_SURF_16_BANK));
3404                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3405                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3406                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407                                  NUM_BANKS(ADDR_SURF_16_BANK));
3408                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3409                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3410                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411                                  NUM_BANKS(ADDR_SURF_16_BANK));
3412                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3414                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415                                  NUM_BANKS(ADDR_SURF_16_BANK));
3416                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3417                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3418                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419                                  NUM_BANKS(ADDR_SURF_16_BANK));
3420                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3421                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3422                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3423                                  NUM_BANKS(ADDR_SURF_8_BANK));
3424
3425                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3426                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3427                             reg_offset != 23)
3428                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3429
3430                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3431                         if (reg_offset != 7)
3432                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3433
3434                 break;
3435         }
3436 }
3437
3438 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3439                                   u32 se_num, u32 sh_num, u32 instance)
3440 {
3441         u32 data;
3442
3443         if (instance == 0xffffffff)
3444                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3445         else
3446                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3447
3448         if (se_num == 0xffffffff)
3449                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3450         else
3451                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3452
3453         if (sh_num == 0xffffffff)
3454                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3455         else
3456                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3457
3458         WREG32(mmGRBM_GFX_INDEX, data);
3459 }
3460
3461 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3462 {
3463         return (u32)((1ULL << bit_width) - 1);
3464 }
3465
3466 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3467 {
3468         u32 data, mask;
3469
3470         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3471                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3472
3473         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3474
3475         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3476                                        adev->gfx.config.max_sh_per_se);
3477
3478         return (~data) & mask;
3479 }
3480
3481 static void
3482 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3483 {
3484         switch (adev->asic_type) {
3485         case CHIP_FIJI:
3486                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3487                           RB_XSEL2(1) | PKR_MAP(2) |
3488                           PKR_XSEL(1) | PKR_YSEL(1) |
3489                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3490                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3491                            SE_PAIR_YSEL(2);
3492                 break;
3493         case CHIP_TONGA:
3494         case CHIP_POLARIS10:
3495                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3496                           SE_XSEL(1) | SE_YSEL(1);
3497                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3498                            SE_PAIR_YSEL(2);
3499                 break;
3500         case CHIP_TOPAZ:
3501         case CHIP_CARRIZO:
3502                 *rconf |= RB_MAP_PKR0(2);
3503                 *rconf1 |= 0x0;
3504                 break;
3505         case CHIP_POLARIS11:
3506                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3507                           SE_XSEL(1) | SE_YSEL(1);
3508                 *rconf1 |= 0x0;
3509                 break;
3510         case CHIP_STONEY:
3511                 *rconf |= 0x0;
3512                 *rconf1 |= 0x0;
3513                 break;
3514         default:
3515                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3516                 break;
3517         }
3518 }
3519
3520 static void
3521 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3522                                         u32 raster_config, u32 raster_config_1,
3523                                         unsigned rb_mask, unsigned num_rb)
3524 {
3525         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3526         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3527         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3528         unsigned rb_per_se = num_rb / num_se;
3529         unsigned se_mask[4];
3530         unsigned se;
3531
3532         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3533         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3534         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3535         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3536
3537         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3538         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3539         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3540
3541         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3542                              (!se_mask[2] && !se_mask[3]))) {
3543                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3544
3545                 if (!se_mask[0] && !se_mask[1]) {
3546                         raster_config_1 |=
3547                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3548                 } else {
3549                         raster_config_1 |=
3550                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3551                 }
3552         }
3553
3554         for (se = 0; se < num_se; se++) {
3555                 unsigned raster_config_se = raster_config;
3556                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3557                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3558                 int idx = (se / 2) * 2;
3559
3560                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3561                         raster_config_se &= ~SE_MAP_MASK;
3562
3563                         if (!se_mask[idx]) {
3564                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3565                         } else {
3566                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3567                         }
3568                 }
3569
3570                 pkr0_mask &= rb_mask;
3571                 pkr1_mask &= rb_mask;
3572                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3573                         raster_config_se &= ~PKR_MAP_MASK;
3574
3575                         if (!pkr0_mask) {
3576                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3577                         } else {
3578                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3579                         }
3580                 }
3581
3582                 if (rb_per_se >= 2) {
3583                         unsigned rb0_mask = 1 << (se * rb_per_se);
3584                         unsigned rb1_mask = rb0_mask << 1;
3585
3586                         rb0_mask &= rb_mask;
3587                         rb1_mask &= rb_mask;
3588                         if (!rb0_mask || !rb1_mask) {
3589                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3590
3591                                 if (!rb0_mask) {
3592                                         raster_config_se |=
3593                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3594                                 } else {
3595                                         raster_config_se |=
3596                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3597                                 }
3598                         }
3599
3600                         if (rb_per_se > 2) {
3601                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3602                                 rb1_mask = rb0_mask << 1;
3603                                 rb0_mask &= rb_mask;
3604                                 rb1_mask &= rb_mask;
3605                                 if (!rb0_mask || !rb1_mask) {
3606                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3607
3608                                         if (!rb0_mask) {
3609                                                 raster_config_se |=
3610                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3611                                         } else {
3612                                                 raster_config_se |=
3613                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3614                                         }
3615                                 }
3616                         }
3617                 }
3618
3619                 /* GRBM_GFX_INDEX has a different offset on VI */
3620                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3621                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3622                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3623         }
3624
3625         /* GRBM_GFX_INDEX has a different offset on VI */
3626         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3627 }
3628
3629 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3630 {
3631         int i, j;
3632         u32 data;
3633         u32 raster_config = 0, raster_config_1 = 0;
3634         u32 active_rbs = 0;
3635         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3636                                         adev->gfx.config.max_sh_per_se;
3637         unsigned num_rb_pipes;
3638
3639         mutex_lock(&adev->grbm_idx_mutex);
3640         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3641                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3642                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3643                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3644                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3645                                                rb_bitmap_width_per_sh);
3646                 }
3647         }
3648         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3649
3650         adev->gfx.config.backend_enable_mask = active_rbs;
3651         adev->gfx.config.num_rbs = hweight32(active_rbs);
3652
3653         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3654                              adev->gfx.config.max_shader_engines, 16);
3655
3656         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3657
3658         if (!adev->gfx.config.backend_enable_mask ||
3659                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3660                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3661                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3662         } else {
3663                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3664                                                         adev->gfx.config.backend_enable_mask,
3665                                                         num_rb_pipes);
3666         }
3667
3668         /* cache the values for userspace */
3669         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3670                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3671                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3672                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3673                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3674                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3675                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3676                         adev->gfx.config.rb_config[i][j].raster_config =
3677                                 RREG32(mmPA_SC_RASTER_CONFIG);
3678                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3679                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3680                 }
3681         }
3682         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3683         mutex_unlock(&adev->grbm_idx_mutex);
3684 }
3685
3686 /**
3687  * gfx_v8_0_init_compute_vmid - gart enable
3688  *
3689  * @rdev: amdgpu_device pointer
3690  *
3691  * Initialize compute vmid sh_mem registers
3692  *
3693  */
3694 #define DEFAULT_SH_MEM_BASES    (0x6000)
3695 #define FIRST_COMPUTE_VMID      (8)
3696 #define LAST_COMPUTE_VMID       (16)
3697 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3698 {
3699         int i;
3700         uint32_t sh_mem_config;
3701         uint32_t sh_mem_bases;
3702
3703         /*
3704          * Configure apertures:
3705          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3706          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3707          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3708          */
3709         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3710
3711         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3712                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3713                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3714                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3715                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3716                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3717
3718         mutex_lock(&adev->srbm_mutex);
3719         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3720                 vi_srbm_select(adev, 0, 0, 0, i);
3721                 /* CP and shaders */
3722                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3723                 WREG32(mmSH_MEM_APE1_BASE, 1);
3724                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3725                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3726         }
3727         vi_srbm_select(adev, 0, 0, 0, 0);
3728         mutex_unlock(&adev->srbm_mutex);
3729 }
3730
3731 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3732 {
3733         u32 tmp;
3734         int i;
3735
3736         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3737         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3739         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3740
3741         gfx_v8_0_tiling_mode_table_init(adev);
3742         gfx_v8_0_setup_rb(adev);
3743         gfx_v8_0_get_cu_info(adev);
3744
3745         /* XXX SH_MEM regs */
3746         /* where to put LDS, scratch, GPUVM in FSA64 space */
3747         mutex_lock(&adev->srbm_mutex);
3748         for (i = 0; i < 16; i++) {
3749                 vi_srbm_select(adev, 0, 0, 0, i);
3750                 /* CP and shaders */
3751                 if (i == 0) {
3752                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3753                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3754                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3755                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3756                         WREG32(mmSH_MEM_CONFIG, tmp);
3757                 } else {
3758                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3759                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3760                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3761                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3762                         WREG32(mmSH_MEM_CONFIG, tmp);
3763                 }
3764
3765                 WREG32(mmSH_MEM_APE1_BASE, 1);
3766                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3767                 WREG32(mmSH_MEM_BASES, 0);
3768         }
3769         vi_srbm_select(adev, 0, 0, 0, 0);
3770         mutex_unlock(&adev->srbm_mutex);
3771
3772         gfx_v8_0_init_compute_vmid(adev);
3773
3774         mutex_lock(&adev->grbm_idx_mutex);
3775         /*
3776          * making sure that the following register writes will be broadcasted
3777          * to all the shaders
3778          */
3779         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3780
3781         WREG32(mmPA_SC_FIFO_SIZE,
3782                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3783                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3784                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3785                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3786                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3787                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3788                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3789                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3790         mutex_unlock(&adev->grbm_idx_mutex);
3791
3792 }
3793
3794 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3795 {
3796         u32 i, j, k;
3797         u32 mask;
3798
3799         mutex_lock(&adev->grbm_idx_mutex);
3800         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3801                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3802                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3803                         for (k = 0; k < adev->usec_timeout; k++) {
3804                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3805                                         break;
3806                                 udelay(1);
3807                         }
3808                 }
3809         }
3810         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3811         mutex_unlock(&adev->grbm_idx_mutex);
3812
3813         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3814                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3815                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3816                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3817         for (k = 0; k < adev->usec_timeout; k++) {
3818                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3819                         break;
3820                 udelay(1);
3821         }
3822 }
3823
3824 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3825                                                bool enable)
3826 {
3827         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3828
3829         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3830         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3831         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3832         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3833
3834         WREG32(mmCP_INT_CNTL_RING0, tmp);
3835 }
3836
3837 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3838 {
3839         /* csib */
3840         WREG32(mmRLC_CSIB_ADDR_HI,
3841                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3842         WREG32(mmRLC_CSIB_ADDR_LO,
3843                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3844         WREG32(mmRLC_CSIB_LENGTH,
3845                         adev->gfx.rlc.clear_state_size);
3846 }
3847
3848 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3849                                 int ind_offset,
3850                                 int list_size,
3851                                 int *unique_indices,
3852                                 int *indices_count,
3853                                 int max_indices,
3854                                 int *ind_start_offsets,
3855                                 int *offset_count,
3856                                 int max_offset)
3857 {
3858         int indices;
3859         bool new_entry = true;
3860
3861         for (; ind_offset < list_size; ind_offset++) {
3862
3863                 if (new_entry) {
3864                         new_entry = false;
3865                         ind_start_offsets[*offset_count] = ind_offset;
3866                         *offset_count = *offset_count + 1;
3867                         BUG_ON(*offset_count >= max_offset);
3868                 }
3869
3870                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3871                         new_entry = true;
3872                         continue;
3873                 }
3874
3875                 ind_offset += 2;
3876
3877                 /* look for the matching indice */
3878                 for (indices = 0;
3879                         indices < *indices_count;
3880                         indices++) {
3881                         if (unique_indices[indices] ==
3882                                 register_list_format[ind_offset])
3883                                 break;
3884                 }
3885
3886                 if (indices >= *indices_count) {
3887                         unique_indices[*indices_count] =
3888                                 register_list_format[ind_offset];
3889                         indices = *indices_count;
3890                         *indices_count = *indices_count + 1;
3891                         BUG_ON(*indices_count >= max_indices);
3892                 }
3893
3894                 register_list_format[ind_offset] = indices;
3895         }
3896 }
3897
3898 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3899 {
3900         int i, temp, data;
3901         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3902         int indices_count = 0;
3903         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3904         int offset_count = 0;
3905
3906         int list_size;
3907         unsigned int *register_list_format =
3908                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3909         if (!register_list_format)
3910                 return -ENOMEM;
3911         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3912                         adev->gfx.rlc.reg_list_format_size_bytes);
3913
3914         gfx_v8_0_parse_ind_reg_list(register_list_format,
3915                                 RLC_FormatDirectRegListLength,
3916                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3917                                 unique_indices,
3918                                 &indices_count,
3919                                 sizeof(unique_indices) / sizeof(int),
3920                                 indirect_start_offsets,
3921                                 &offset_count,
3922                                 sizeof(indirect_start_offsets)/sizeof(int));
3923
3924         /* save and restore list */
3925         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3926
3927         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3928         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3929                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3930
3931         /* indirect list */
3932         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3933         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3934                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3935
3936         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3937         list_size = list_size >> 1;
3938         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3939         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3940
3941         /* starting offsets starts */
3942         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3943                 adev->gfx.rlc.starting_offsets_start);
3944         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3945                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3946                                 indirect_start_offsets[i]);
3947
3948         /* unique indices */
3949         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3950         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3951         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3952                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3953                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3954         }
3955         kfree(register_list_format);
3956
3957         return 0;
3958 }
3959
3960 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3961 {
3962         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3963 }
3964
3965 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3966 {
3967         uint32_t data;
3968
3969         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3970                               AMD_PG_SUPPORT_GFX_SMG |
3971                               AMD_PG_SUPPORT_GFX_DMG)) {
3972                 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3973
3974                 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3975                 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3976                 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3977                 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3978                 WREG32(mmRLC_PG_DELAY, data);
3979
3980                 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3981                 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3982         }
3983 }
3984
3985 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3986                                                 bool enable)
3987 {
3988         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3989 }
3990
3991 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3992                                                   bool enable)
3993 {
3994         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3995 }
3996
3997 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3998 {
3999         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
4000 }
4001
4002 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4003 {
4004         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4005                               AMD_PG_SUPPORT_GFX_SMG |
4006                               AMD_PG_SUPPORT_GFX_DMG |
4007                               AMD_PG_SUPPORT_CP |
4008                               AMD_PG_SUPPORT_GDS |
4009                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
4010                 gfx_v8_0_init_csb(adev);
4011                 gfx_v8_0_init_save_restore_list(adev);
4012                 gfx_v8_0_enable_save_restore_machine(adev);
4013
4014                 if ((adev->asic_type == CHIP_CARRIZO) ||
4015                     (adev->asic_type == CHIP_STONEY)) {
4016                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4017                         gfx_v8_0_init_power_gating(adev);
4018                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4019                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4020                                 cz_enable_sck_slow_down_on_power_up(adev, true);
4021                                 cz_enable_sck_slow_down_on_power_down(adev, true);
4022                         } else {
4023                                 cz_enable_sck_slow_down_on_power_up(adev, false);
4024                                 cz_enable_sck_slow_down_on_power_down(adev, false);
4025                         }
4026                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4027                                 cz_enable_cp_power_gating(adev, true);
4028                         else
4029                                 cz_enable_cp_power_gating(adev, false);
4030                 } else if (adev->asic_type == CHIP_POLARIS11) {
4031                         gfx_v8_0_init_power_gating(adev);
4032                 }
4033         }
4034 }
4035
4036 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4037 {
4038         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4039
4040         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4041         gfx_v8_0_wait_for_rlc_serdes(adev);
4042 }
4043
4044 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4045 {
4046         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4047         udelay(50);
4048
4049         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4050         udelay(50);
4051 }
4052
4053 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4054 {
4055         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4056
4057         /* carrizo do enable cp interrupt after cp inited */
4058         if (!(adev->flags & AMD_IS_APU))
4059                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4060
4061         udelay(50);
4062 }
4063
4064 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4065 {
4066         const struct rlc_firmware_header_v2_0 *hdr;
4067         const __le32 *fw_data;
4068         unsigned i, fw_size;
4069
4070         if (!adev->gfx.rlc_fw)
4071                 return -EINVAL;
4072
4073         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4074         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4075
4076         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4077                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4078         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4079
4080         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4081         for (i = 0; i < fw_size; i++)
4082                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4083         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4084
4085         return 0;
4086 }
4087
4088 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4089 {
4090         int r;
4091         u32 tmp;
4092
4093         gfx_v8_0_rlc_stop(adev);
4094
4095         /* disable CG */
4096         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4097         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4098                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4099         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4100         if (adev->asic_type == CHIP_POLARIS11 ||
4101             adev->asic_type == CHIP_POLARIS10) {
4102                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4103                 tmp &= ~0x3;
4104                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4105         }
4106
4107         /* disable PG */
4108         WREG32(mmRLC_PG_CNTL, 0);
4109
4110         gfx_v8_0_rlc_reset(adev);
4111         gfx_v8_0_init_pg(adev);
4112
4113         if (!adev->pp_enabled) {
4114                 if (!adev->firmware.smu_load) {
4115                         /* legacy rlc firmware loading */
4116                         r = gfx_v8_0_rlc_load_microcode(adev);
4117                         if (r)
4118                                 return r;
4119                 } else {
4120                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4121                                                         AMDGPU_UCODE_ID_RLC_G);
4122                         if (r)
4123                                 return -EINVAL;
4124                 }
4125         }
4126
4127         gfx_v8_0_rlc_start(adev);
4128
4129         return 0;
4130 }
4131
4132 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4133 {
4134         int i;
4135         u32 tmp = RREG32(mmCP_ME_CNTL);
4136
4137         if (enable) {
4138                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4139                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4140                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4141         } else {
4142                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4143                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4144                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4145                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4146                         adev->gfx.gfx_ring[i].ready = false;
4147         }
4148         WREG32(mmCP_ME_CNTL, tmp);
4149         udelay(50);
4150 }
4151
4152 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4153 {
4154         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4155         const struct gfx_firmware_header_v1_0 *ce_hdr;
4156         const struct gfx_firmware_header_v1_0 *me_hdr;
4157         const __le32 *fw_data;
4158         unsigned i, fw_size;
4159
4160         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4161                 return -EINVAL;
4162
4163         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4164                 adev->gfx.pfp_fw->data;
4165         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4166                 adev->gfx.ce_fw->data;
4167         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4168                 adev->gfx.me_fw->data;
4169
4170         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4171         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4172         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4173
4174         gfx_v8_0_cp_gfx_enable(adev, false);
4175
4176         /* PFP */
4177         fw_data = (const __le32 *)
4178                 (adev->gfx.pfp_fw->data +
4179                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4180         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4181         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4182         for (i = 0; i < fw_size; i++)
4183                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4184         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4185
4186         /* CE */
4187         fw_data = (const __le32 *)
4188                 (adev->gfx.ce_fw->data +
4189                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4190         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4191         WREG32(mmCP_CE_UCODE_ADDR, 0);
4192         for (i = 0; i < fw_size; i++)
4193                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4194         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4195
4196         /* ME */
4197         fw_data = (const __le32 *)
4198                 (adev->gfx.me_fw->data +
4199                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4200         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4201         WREG32(mmCP_ME_RAM_WADDR, 0);
4202         for (i = 0; i < fw_size; i++)
4203                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4204         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4205
4206         return 0;
4207 }
4208
4209 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4210 {
4211         u32 count = 0;
4212         const struct cs_section_def *sect = NULL;
4213         const struct cs_extent_def *ext = NULL;
4214
4215         /* begin clear state */
4216         count += 2;
4217         /* context control state */
4218         count += 3;
4219
4220         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4221                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4222                         if (sect->id == SECT_CONTEXT)
4223                                 count += 2 + ext->reg_count;
4224                         else
4225                                 return 0;
4226                 }
4227         }
4228         /* pa_sc_raster_config/pa_sc_raster_config1 */
4229         count += 4;
4230         /* end clear state */
4231         count += 2;
4232         /* clear state */
4233         count += 2;
4234
4235         return count;
4236 }
4237
4238 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4239 {
4240         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4241         const struct cs_section_def *sect = NULL;
4242         const struct cs_extent_def *ext = NULL;
4243         int r, i;
4244
4245         /* init the CP */
4246         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4247         WREG32(mmCP_ENDIAN_SWAP, 0);
4248         WREG32(mmCP_DEVICE_ID, 1);
4249
4250         gfx_v8_0_cp_gfx_enable(adev, true);
4251
4252         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4253         if (r) {
4254                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4255                 return r;
4256         }
4257
4258         /* clear state buffer */
4259         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4260         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4261
4262         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4263         amdgpu_ring_write(ring, 0x80000000);
4264         amdgpu_ring_write(ring, 0x80000000);
4265
4266         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4267                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4268                         if (sect->id == SECT_CONTEXT) {
4269                                 amdgpu_ring_write(ring,
4270                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4271                                                ext->reg_count));
4272                                 amdgpu_ring_write(ring,
4273                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4274                                 for (i = 0; i < ext->reg_count; i++)
4275                                         amdgpu_ring_write(ring, ext->extent[i]);
4276                         }
4277                 }
4278         }
4279
4280         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4281         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4282         switch (adev->asic_type) {
4283         case CHIP_TONGA:
4284         case CHIP_POLARIS10:
4285                 amdgpu_ring_write(ring, 0x16000012);
4286                 amdgpu_ring_write(ring, 0x0000002A);
4287                 break;
4288         case CHIP_POLARIS11:
4289                 amdgpu_ring_write(ring, 0x16000012);
4290                 amdgpu_ring_write(ring, 0x00000000);
4291                 break;
4292         case CHIP_FIJI:
4293                 amdgpu_ring_write(ring, 0x3a00161a);
4294                 amdgpu_ring_write(ring, 0x0000002e);
4295                 break;
4296         case CHIP_CARRIZO:
4297                 amdgpu_ring_write(ring, 0x00000002);
4298                 amdgpu_ring_write(ring, 0x00000000);
4299                 break;
4300         case CHIP_TOPAZ:
4301                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4302                                 0x00000000 : 0x00000002);
4303                 amdgpu_ring_write(ring, 0x00000000);
4304                 break;
4305         case CHIP_STONEY:
4306                 amdgpu_ring_write(ring, 0x00000000);
4307                 amdgpu_ring_write(ring, 0x00000000);
4308                 break;
4309         default:
4310                 BUG();
4311         }
4312
4313         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4314         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4315
4316         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4317         amdgpu_ring_write(ring, 0);
4318
4319         /* init the CE partitions */
4320         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4321         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4322         amdgpu_ring_write(ring, 0x8000);
4323         amdgpu_ring_write(ring, 0x8000);
4324
4325         amdgpu_ring_commit(ring);
4326
4327         return 0;
4328 }
4329
4330 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4331 {
4332         struct amdgpu_ring *ring;
4333         u32 tmp;
4334         u32 rb_bufsz;
4335         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4336         int r;
4337
4338         /* Set the write pointer delay */
4339         WREG32(mmCP_RB_WPTR_DELAY, 0);
4340
4341         /* set the RB to use vmid 0 */
4342         WREG32(mmCP_RB_VMID, 0);
4343
4344         /* Set ring buffer size */
4345         ring = &adev->gfx.gfx_ring[0];
4346         rb_bufsz = order_base_2(ring->ring_size / 8);
4347         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4348         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4349         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4350         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4351 #ifdef __BIG_ENDIAN
4352         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4353 #endif
4354         WREG32(mmCP_RB0_CNTL, tmp);
4355
4356         /* Initialize the ring buffer's read and write pointers */
4357         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4358         ring->wptr = 0;
4359         WREG32(mmCP_RB0_WPTR, ring->wptr);
4360
4361         /* set the wb address wether it's enabled or not */
4362         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4363         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4364         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4365
4366         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4367         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4368         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4369         mdelay(1);
4370         WREG32(mmCP_RB0_CNTL, tmp);
4371
4372         rb_addr = ring->gpu_addr >> 8;
4373         WREG32(mmCP_RB0_BASE, rb_addr);
4374         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4375
4376         /* no gfx doorbells on iceland */
4377         if (adev->asic_type != CHIP_TOPAZ) {
4378                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4379                 if (ring->use_doorbell) {
4380                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4381                                             DOORBELL_OFFSET, ring->doorbell_index);
4382                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4383                                             DOORBELL_HIT, 0);
4384                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4385                                             DOORBELL_EN, 1);
4386                 } else {
4387                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4388                                             DOORBELL_EN, 0);
4389                 }
4390                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4391
4392                 if (adev->asic_type == CHIP_TONGA) {
4393                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4394                                             DOORBELL_RANGE_LOWER,
4395                                             AMDGPU_DOORBELL_GFX_RING0);
4396                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4397
4398                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4399                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4400                 }
4401
4402         }
4403
4404         /* start the ring */
4405         gfx_v8_0_cp_gfx_start(adev);
4406         ring->ready = true;
4407         r = amdgpu_ring_test_ring(ring);
4408         if (r)
4409                 ring->ready = false;
4410
4411         return r;
4412 }
4413
4414 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4415 {
4416         int i;
4417
4418         if (enable) {
4419                 WREG32(mmCP_MEC_CNTL, 0);
4420         } else {
4421                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4422                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4423                         adev->gfx.compute_ring[i].ready = false;
4424         }
4425         udelay(50);
4426 }
4427
4428 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4429 {
4430         const struct gfx_firmware_header_v1_0 *mec_hdr;
4431         const __le32 *fw_data;
4432         unsigned i, fw_size;
4433
4434         if (!adev->gfx.mec_fw)
4435                 return -EINVAL;
4436
4437         gfx_v8_0_cp_compute_enable(adev, false);
4438
4439         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4440         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4441
4442         fw_data = (const __le32 *)
4443                 (adev->gfx.mec_fw->data +
4444                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4445         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4446
4447         /* MEC1 */
4448         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4449         for (i = 0; i < fw_size; i++)
4450                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4451         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4452
4453         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4454         if (adev->gfx.mec2_fw) {
4455                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4456
4457                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4458                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4459
4460                 fw_data = (const __le32 *)
4461                         (adev->gfx.mec2_fw->data +
4462                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4463                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4464
4465                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4466                 for (i = 0; i < fw_size; i++)
4467                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4468                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4469         }
4470
4471         return 0;
4472 }
4473
4474 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4475 {
4476         int i, r;
4477
4478         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4479                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4480
4481                 if (ring->mqd_obj) {
4482                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4483                         if (unlikely(r != 0))
4484                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4485
4486                         amdgpu_bo_unpin(ring->mqd_obj);
4487                         amdgpu_bo_unreserve(ring->mqd_obj);
4488
4489                         amdgpu_bo_unref(&ring->mqd_obj);
4490                         ring->mqd_obj = NULL;
4491                 }
4492         }
4493 }
4494
4495 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4496 {
4497         int r, i, j;
4498         u32 tmp;
4499         bool use_doorbell = true;
4500         u64 hqd_gpu_addr;
4501         u64 mqd_gpu_addr;
4502         u64 eop_gpu_addr;
4503         u64 wb_gpu_addr;
4504         u32 *buf;
4505         struct vi_mqd *mqd;
4506
4507         /* init the queues.  */
4508         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4509                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4510
4511                 if (ring->mqd_obj == NULL) {
4512                         r = amdgpu_bo_create(adev,
4513                                              sizeof(struct vi_mqd),
4514                                              PAGE_SIZE, true,
4515                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4516                                              NULL, &ring->mqd_obj);
4517                         if (r) {
4518                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4519                                 return r;
4520                         }
4521                 }
4522
4523                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4524                 if (unlikely(r != 0)) {
4525                         gfx_v8_0_cp_compute_fini(adev);
4526                         return r;
4527                 }
4528                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4529                                   &mqd_gpu_addr);
4530                 if (r) {
4531                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4532                         gfx_v8_0_cp_compute_fini(adev);
4533                         return r;
4534                 }
4535                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4536                 if (r) {
4537                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4538                         gfx_v8_0_cp_compute_fini(adev);
4539                         return r;
4540                 }
4541
4542                 /* init the mqd struct */
4543                 memset(buf, 0, sizeof(struct vi_mqd));
4544
4545                 mqd = (struct vi_mqd *)buf;
4546                 mqd->header = 0xC0310800;
4547                 mqd->compute_pipelinestat_enable = 0x00000001;
4548                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4549                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4550                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4551                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4552                 mqd->compute_misc_reserved = 0x00000003;
4553
4554                 mutex_lock(&adev->srbm_mutex);
4555                 vi_srbm_select(adev, ring->me,
4556                                ring->pipe,
4557                                ring->queue, 0);
4558
4559                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4560                 eop_gpu_addr >>= 8;
4561
4562                 /* write the EOP addr */
4563                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4564                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4565
4566                 /* set the VMID assigned */
4567                 WREG32(mmCP_HQD_VMID, 0);
4568
4569                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4570                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4571                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4572                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4573                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4574
4575                 /* disable wptr polling */
4576                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4577                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4578                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4579
4580                 mqd->cp_hqd_eop_base_addr_lo =
4581                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4582                 mqd->cp_hqd_eop_base_addr_hi =
4583                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4584
4585                 /* enable doorbell? */
4586                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4587                 if (use_doorbell) {
4588                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4589                 } else {
4590                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4591                 }
4592                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4593                 mqd->cp_hqd_pq_doorbell_control = tmp;
4594
4595                 /* disable the queue if it's active */
4596                 mqd->cp_hqd_dequeue_request = 0;
4597                 mqd->cp_hqd_pq_rptr = 0;
4598                 mqd->cp_hqd_pq_wptr= 0;
4599                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4600                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4601                         for (j = 0; j < adev->usec_timeout; j++) {
4602                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4603                                         break;
4604                                 udelay(1);
4605                         }
4606                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4607                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4608                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4609                 }
4610
4611                 /* set the pointer to the MQD */
4612                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4613                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4614                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4615                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4616
4617                 /* set MQD vmid to 0 */
4618                 tmp = RREG32(mmCP_MQD_CONTROL);
4619                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4620                 WREG32(mmCP_MQD_CONTROL, tmp);
4621                 mqd->cp_mqd_control = tmp;
4622
4623                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4624                 hqd_gpu_addr = ring->gpu_addr >> 8;
4625                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4626                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4627                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4628                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4629
4630                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4631                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4632                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4633                                     (order_base_2(ring->ring_size / 4) - 1));
4634                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4635                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4636 #ifdef __BIG_ENDIAN
4637                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4638 #endif
4639                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4640                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4641                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4642                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4643                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4644                 mqd->cp_hqd_pq_control = tmp;
4645
4646                 /* set the wb address wether it's enabled or not */
4647                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4648                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4649                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4650                         upper_32_bits(wb_gpu_addr) & 0xffff;
4651                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4652                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4653                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4654                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4655
4656                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4657                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4658                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4659                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4660                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4661                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4662                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4663
4664                 /* enable the doorbell if requested */
4665                 if (use_doorbell) {
4666                         if ((adev->asic_type == CHIP_CARRIZO) ||
4667                             (adev->asic_type == CHIP_FIJI) ||
4668                             (adev->asic_type == CHIP_STONEY) ||
4669                             (adev->asic_type == CHIP_POLARIS11) ||
4670                             (adev->asic_type == CHIP_POLARIS10)) {
4671                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4672                                        AMDGPU_DOORBELL_KIQ << 2);
4673                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4674                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4675                         }
4676                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4677                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4678                                             DOORBELL_OFFSET, ring->doorbell_index);
4679                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4680                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4681                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4682                         mqd->cp_hqd_pq_doorbell_control = tmp;
4683
4684                 } else {
4685                         mqd->cp_hqd_pq_doorbell_control = 0;
4686                 }
4687                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4688                        mqd->cp_hqd_pq_doorbell_control);
4689
4690                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4691                 ring->wptr = 0;
4692                 mqd->cp_hqd_pq_wptr = ring->wptr;
4693                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4694                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4695
4696                 /* set the vmid for the queue */
4697                 mqd->cp_hqd_vmid = 0;
4698                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4699
4700                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4701                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4702                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4703                 mqd->cp_hqd_persistent_state = tmp;
4704                 if (adev->asic_type == CHIP_STONEY ||
4705                         adev->asic_type == CHIP_POLARIS11 ||
4706                         adev->asic_type == CHIP_POLARIS10) {
4707                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4708                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4709                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4710                 }
4711
4712                 /* activate the queue */
4713                 mqd->cp_hqd_active = 1;
4714                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4715
4716                 vi_srbm_select(adev, 0, 0, 0, 0);
4717                 mutex_unlock(&adev->srbm_mutex);
4718
4719                 amdgpu_bo_kunmap(ring->mqd_obj);
4720                 amdgpu_bo_unreserve(ring->mqd_obj);
4721         }
4722
4723         if (use_doorbell) {
4724                 tmp = RREG32(mmCP_PQ_STATUS);
4725                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4726                 WREG32(mmCP_PQ_STATUS, tmp);
4727         }
4728
4729         gfx_v8_0_cp_compute_enable(adev, true);
4730
4731         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4732                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4733
4734                 ring->ready = true;
4735                 r = amdgpu_ring_test_ring(ring);
4736                 if (r)
4737                         ring->ready = false;
4738         }
4739
4740         return 0;
4741 }
4742
4743 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4744 {
4745         int r;
4746
4747         if (!(adev->flags & AMD_IS_APU))
4748                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4749
4750         if (!adev->pp_enabled) {
4751                 if (!adev->firmware.smu_load) {
4752                         /* legacy firmware loading */
4753                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4754                         if (r)
4755                                 return r;
4756
4757                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4758                         if (r)
4759                                 return r;
4760                 } else {
4761                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4762                                                         AMDGPU_UCODE_ID_CP_CE);
4763                         if (r)
4764                                 return -EINVAL;
4765
4766                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4767                                                         AMDGPU_UCODE_ID_CP_PFP);
4768                         if (r)
4769                                 return -EINVAL;
4770
4771                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4772                                                         AMDGPU_UCODE_ID_CP_ME);
4773                         if (r)
4774                                 return -EINVAL;
4775
4776                         if (adev->asic_type == CHIP_TOPAZ) {
4777                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4778                                 if (r)
4779                                         return r;
4780                         } else {
4781                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4782                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4783                                 if (r)
4784                                         return -EINVAL;
4785                         }
4786                 }
4787         }
4788
4789         r = gfx_v8_0_cp_gfx_resume(adev);
4790         if (r)
4791                 return r;
4792
4793         r = gfx_v8_0_cp_compute_resume(adev);
4794         if (r)
4795                 return r;
4796
4797         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4798
4799         return 0;
4800 }
4801
4802 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4803 {
4804         gfx_v8_0_cp_gfx_enable(adev, enable);
4805         gfx_v8_0_cp_compute_enable(adev, enable);
4806 }
4807
4808 static int gfx_v8_0_hw_init(void *handle)
4809 {
4810         int r;
4811         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4812
4813         gfx_v8_0_init_golden_registers(adev);
4814         gfx_v8_0_gpu_init(adev);
4815
4816         r = gfx_v8_0_rlc_resume(adev);
4817         if (r)
4818                 return r;
4819
4820         r = gfx_v8_0_cp_resume(adev);
4821
4822         return r;
4823 }
4824
4825 static int gfx_v8_0_hw_fini(void *handle)
4826 {
4827         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4828
4829         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4830         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4831         if (amdgpu_sriov_vf(adev)) {
4832                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4833                 return 0;
4834         }
4835         gfx_v8_0_cp_enable(adev, false);
4836         gfx_v8_0_rlc_stop(adev);
4837         gfx_v8_0_cp_compute_fini(adev);
4838
4839         amdgpu_set_powergating_state(adev,
4840                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4841
4842         return 0;
4843 }
4844
4845 static int gfx_v8_0_suspend(void *handle)
4846 {
4847         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4848
4849         return gfx_v8_0_hw_fini(adev);
4850 }
4851
4852 static int gfx_v8_0_resume(void *handle)
4853 {
4854         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4855
4856         return gfx_v8_0_hw_init(adev);
4857 }
4858
4859 static bool gfx_v8_0_is_idle(void *handle)
4860 {
4861         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4862
4863         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4864                 return false;
4865         else
4866                 return true;
4867 }
4868
4869 static int gfx_v8_0_wait_for_idle(void *handle)
4870 {
4871         unsigned i;
4872         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4873
4874         for (i = 0; i < adev->usec_timeout; i++) {
4875                 if (gfx_v8_0_is_idle(handle))
4876                         return 0;
4877
4878                 udelay(1);
4879         }
4880         return -ETIMEDOUT;
4881 }
4882
4883 static bool gfx_v8_0_check_soft_reset(void *handle)
4884 {
4885         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4886         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4887         u32 tmp;
4888
4889         /* GRBM_STATUS */
4890         tmp = RREG32(mmGRBM_STATUS);
4891         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4892                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4893                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4894                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4895                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4896                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4897                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4898                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4899                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4900                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4901                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4902                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4903                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4904         }
4905
4906         /* GRBM_STATUS2 */
4907         tmp = RREG32(mmGRBM_STATUS2);
4908         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4909                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4910                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4911
4912         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4913             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4914             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4915                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4916                                                 SOFT_RESET_CPF, 1);
4917                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4918                                                 SOFT_RESET_CPC, 1);
4919                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4920                                                 SOFT_RESET_CPG, 1);
4921                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4922                                                 SOFT_RESET_GRBM, 1);
4923         }
4924
4925         /* SRBM_STATUS */
4926         tmp = RREG32(mmSRBM_STATUS);
4927         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4928                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4929                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4930         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4931                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4932                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4933
4934         if (grbm_soft_reset || srbm_soft_reset) {
4935                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4936                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4937                 return true;
4938         } else {
4939                 adev->gfx.grbm_soft_reset = 0;
4940                 adev->gfx.srbm_soft_reset = 0;
4941                 return false;
4942         }
4943 }
4944
4945 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
4946                                   struct amdgpu_ring *ring)
4947 {
4948         int i;
4949
4950         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4951         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4952                 u32 tmp;
4953                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
4954                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
4955                                     DEQUEUE_REQ, 2);
4956                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
4957                 for (i = 0; i < adev->usec_timeout; i++) {
4958                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4959                                 break;
4960                         udelay(1);
4961                 }
4962         }
4963 }
4964
4965 static int gfx_v8_0_pre_soft_reset(void *handle)
4966 {
4967         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4968         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4969
4970         if ((!adev->gfx.grbm_soft_reset) &&
4971             (!adev->gfx.srbm_soft_reset))
4972                 return 0;
4973
4974         grbm_soft_reset = adev->gfx.grbm_soft_reset;
4975         srbm_soft_reset = adev->gfx.srbm_soft_reset;
4976
4977         /* stop the rlc */
4978         gfx_v8_0_rlc_stop(adev);
4979
4980         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4981             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4982                 /* Disable GFX parsing/prefetching */
4983                 gfx_v8_0_cp_gfx_enable(adev, false);
4984
4985         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4986             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4987             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4988             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4989                 int i;
4990
4991                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4992                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4993
4994                         gfx_v8_0_inactive_hqd(adev, ring);
4995                 }
4996                 /* Disable MEC parsing/prefetching */
4997                 gfx_v8_0_cp_compute_enable(adev, false);
4998         }
4999
5000        return 0;
5001 }
5002
5003 static int gfx_v8_0_soft_reset(void *handle)
5004 {
5005         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5006         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5007         u32 tmp;
5008
5009         if ((!adev->gfx.grbm_soft_reset) &&
5010             (!adev->gfx.srbm_soft_reset))
5011                 return 0;
5012
5013         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5014         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5015
5016         if (grbm_soft_reset || srbm_soft_reset) {
5017                 tmp = RREG32(mmGMCON_DEBUG);
5018                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5019                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5020                 WREG32(mmGMCON_DEBUG, tmp);
5021                 udelay(50);
5022         }
5023
5024         if (grbm_soft_reset) {
5025                 tmp = RREG32(mmGRBM_SOFT_RESET);
5026                 tmp |= grbm_soft_reset;
5027                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5028                 WREG32(mmGRBM_SOFT_RESET, tmp);
5029                 tmp = RREG32(mmGRBM_SOFT_RESET);
5030
5031                 udelay(50);
5032
5033                 tmp &= ~grbm_soft_reset;
5034                 WREG32(mmGRBM_SOFT_RESET, tmp);
5035                 tmp = RREG32(mmGRBM_SOFT_RESET);
5036         }
5037
5038         if (srbm_soft_reset) {
5039                 tmp = RREG32(mmSRBM_SOFT_RESET);
5040                 tmp |= srbm_soft_reset;
5041                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5042                 WREG32(mmSRBM_SOFT_RESET, tmp);
5043                 tmp = RREG32(mmSRBM_SOFT_RESET);
5044
5045                 udelay(50);
5046
5047                 tmp &= ~srbm_soft_reset;
5048                 WREG32(mmSRBM_SOFT_RESET, tmp);
5049                 tmp = RREG32(mmSRBM_SOFT_RESET);
5050         }
5051
5052         if (grbm_soft_reset || srbm_soft_reset) {
5053                 tmp = RREG32(mmGMCON_DEBUG);
5054                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5055                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5056                 WREG32(mmGMCON_DEBUG, tmp);
5057         }
5058
5059         /* Wait a little for things to settle down */
5060         udelay(50);
5061
5062         return 0;
5063 }
5064
5065 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5066                               struct amdgpu_ring *ring)
5067 {
5068         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5069         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5070         WREG32(mmCP_HQD_PQ_RPTR, 0);
5071         WREG32(mmCP_HQD_PQ_WPTR, 0);
5072         vi_srbm_select(adev, 0, 0, 0, 0);
5073 }
5074
5075 static int gfx_v8_0_post_soft_reset(void *handle)
5076 {
5077         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5078         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5079
5080         if ((!adev->gfx.grbm_soft_reset) &&
5081             (!adev->gfx.srbm_soft_reset))
5082                 return 0;
5083
5084         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5085         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5086
5087         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5088             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5089                 gfx_v8_0_cp_gfx_resume(adev);
5090
5091         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5092             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5093             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5094             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5095                 int i;
5096
5097                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5098                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5099
5100                         gfx_v8_0_init_hqd(adev, ring);
5101                 }
5102                 gfx_v8_0_cp_compute_resume(adev);
5103         }
5104         gfx_v8_0_rlc_start(adev);
5105
5106         return 0;
5107 }
5108
5109 /**
5110  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5111  *
5112  * @adev: amdgpu_device pointer
5113  *
5114  * Fetches a GPU clock counter snapshot.
5115  * Returns the 64 bit clock counter snapshot.
5116  */
5117 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5118 {
5119         uint64_t clock;
5120
5121         mutex_lock(&adev->gfx.gpu_clock_mutex);
5122         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5123         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5124                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5125         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5126         return clock;
5127 }
5128
5129 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5130                                           uint32_t vmid,
5131                                           uint32_t gds_base, uint32_t gds_size,
5132                                           uint32_t gws_base, uint32_t gws_size,
5133                                           uint32_t oa_base, uint32_t oa_size)
5134 {
5135         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5136         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5137
5138         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5139         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5140
5141         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5142         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5143
5144         /* GDS Base */
5145         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5146         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5147                                 WRITE_DATA_DST_SEL(0)));
5148         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5149         amdgpu_ring_write(ring, 0);
5150         amdgpu_ring_write(ring, gds_base);
5151
5152         /* GDS Size */
5153         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5154         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5155                                 WRITE_DATA_DST_SEL(0)));
5156         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5157         amdgpu_ring_write(ring, 0);
5158         amdgpu_ring_write(ring, gds_size);
5159
5160         /* GWS */
5161         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5162         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5163                                 WRITE_DATA_DST_SEL(0)));
5164         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5165         amdgpu_ring_write(ring, 0);
5166         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5167
5168         /* OA */
5169         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5170         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5171                                 WRITE_DATA_DST_SEL(0)));
5172         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5173         amdgpu_ring_write(ring, 0);
5174         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5175 }
5176
5177 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5178 {
5179         WREG32(mmSQ_IND_INDEX,
5180                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5181                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5182                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5183                 (SQ_IND_INDEX__FORCE_READ_MASK));
5184         return RREG32(mmSQ_IND_DATA);
5185 }
5186
5187 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5188                            uint32_t wave, uint32_t thread,
5189                            uint32_t regno, uint32_t num, uint32_t *out)
5190 {
5191         WREG32(mmSQ_IND_INDEX,
5192                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5193                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5194                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5195                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5196                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5197                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5198         while (num--)
5199                 *(out++) = RREG32(mmSQ_IND_DATA);
5200 }
5201
5202 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5203 {
5204         /* type 0 wave data */
5205         dst[(*no_fields)++] = 0;
5206         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5207         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5208         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5209         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5210         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5211         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5212         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5213         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5214         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5215         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5216         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5217         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5218         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5219         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5220         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5221         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5222         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5223         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5224 }
5225
5226 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5227                                      uint32_t wave, uint32_t start,
5228                                      uint32_t size, uint32_t *dst)
5229 {
5230         wave_read_regs(
5231                 adev, simd, wave, 0,
5232                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5233 }
5234
5235
5236 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5237         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5238         .select_se_sh = &gfx_v8_0_select_se_sh,
5239         .read_wave_data = &gfx_v8_0_read_wave_data,
5240         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5241 };
5242
5243 static int gfx_v8_0_early_init(void *handle)
5244 {
5245         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5246
5247         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5248         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5249         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5250         gfx_v8_0_set_ring_funcs(adev);
5251         gfx_v8_0_set_irq_funcs(adev);
5252         gfx_v8_0_set_gds_init(adev);
5253         gfx_v8_0_set_rlc_funcs(adev);
5254
5255         return 0;
5256 }
5257
5258 static int gfx_v8_0_late_init(void *handle)
5259 {
5260         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5261         int r;
5262
5263         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5264         if (r)
5265                 return r;
5266
5267         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5268         if (r)
5269                 return r;
5270
5271         /* requires IBs so do in late init after IB pool is initialized */
5272         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5273         if (r)
5274                 return r;
5275
5276         amdgpu_set_powergating_state(adev,
5277                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5278
5279         return 0;
5280 }
5281
5282 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5283                                                        bool enable)
5284 {
5285         if (adev->asic_type == CHIP_POLARIS11)
5286                 /* Send msg to SMU via Powerplay */
5287                 amdgpu_set_powergating_state(adev,
5288                                              AMD_IP_BLOCK_TYPE_SMC,
5289                                              enable ?
5290                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5291
5292         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5293 }
5294
5295 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5296                                                         bool enable)
5297 {
5298         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5299 }
5300
5301 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5302                 bool enable)
5303 {
5304         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5305 }
5306
5307 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5308                                           bool enable)
5309 {
5310         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5311 }
5312
5313 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5314                                                 bool enable)
5315 {
5316         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5317
5318         /* Read any GFX register to wake up GFX. */
5319         if (!enable)
5320                 RREG32(mmDB_RENDER_CONTROL);
5321 }
5322
5323 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5324                                           bool enable)
5325 {
5326         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5327                 cz_enable_gfx_cg_power_gating(adev, true);
5328                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5329                         cz_enable_gfx_pipeline_power_gating(adev, true);
5330         } else {
5331                 cz_enable_gfx_cg_power_gating(adev, false);
5332                 cz_enable_gfx_pipeline_power_gating(adev, false);
5333         }
5334 }
5335
5336 static int gfx_v8_0_set_powergating_state(void *handle,
5337                                           enum amd_powergating_state state)
5338 {
5339         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5340         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5341
5342         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5343                 return 0;
5344
5345         switch (adev->asic_type) {
5346         case CHIP_CARRIZO:
5347         case CHIP_STONEY:
5348                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5349                         cz_update_gfx_cg_power_gating(adev, enable);
5350
5351                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5352                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5353                 else
5354                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5355
5356                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5357                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5358                 else
5359                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5360                 break;
5361         case CHIP_POLARIS11:
5362                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5363                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5364                 else
5365                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5366
5367                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5368                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5369                 else
5370                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5371
5372                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5373                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5374                 else
5375                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5376                 break;
5377         default:
5378                 break;
5379         }
5380
5381         return 0;
5382 }
5383
5384 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5385                                      uint32_t reg_addr, uint32_t cmd)
5386 {
5387         uint32_t data;
5388
5389         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5390
5391         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5392         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5393
5394         data = RREG32(mmRLC_SERDES_WR_CTRL);
5395         if (adev->asic_type == CHIP_STONEY)
5396                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5397                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5398                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5399                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5400                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5401                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5402                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5403                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5404                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5405         else
5406                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5407                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5408                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5409                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5410                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5411                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5412                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5413                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5414                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5415                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5416                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5417         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5418                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5419                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5420                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5421
5422         WREG32(mmRLC_SERDES_WR_CTRL, data);
5423 }
5424
5425 #define MSG_ENTER_RLC_SAFE_MODE     1
5426 #define MSG_EXIT_RLC_SAFE_MODE      0
5427 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5428 #define RLC_GPR_REG2__REQ__SHIFT 0
5429 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5430 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5431
5432 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5433 {
5434         u32 data = 0;
5435         unsigned i;
5436
5437         data = RREG32(mmRLC_CNTL);
5438         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5439                 return;
5440
5441         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5442             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5443                                AMD_PG_SUPPORT_GFX_DMG))) {
5444                 data |= RLC_GPR_REG2__REQ_MASK;
5445                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5446                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5447                 WREG32(mmRLC_GPR_REG2, data);
5448
5449                 for (i = 0; i < adev->usec_timeout; i++) {
5450                         if ((RREG32(mmRLC_GPM_STAT) &
5451                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5452                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5453                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5454                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5455                                 break;
5456                         udelay(1);
5457                 }
5458
5459                 for (i = 0; i < adev->usec_timeout; i++) {
5460                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5461                                 break;
5462                         udelay(1);
5463                 }
5464                 adev->gfx.rlc.in_safe_mode = true;
5465         }
5466 }
5467
5468 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5469 {
5470         u32 data;
5471         unsigned i;
5472
5473         data = RREG32(mmRLC_CNTL);
5474         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5475                 return;
5476
5477         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5478             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5479                                AMD_PG_SUPPORT_GFX_DMG))) {
5480                 data |= RLC_GPR_REG2__REQ_MASK;
5481                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5482                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5483                 WREG32(mmRLC_GPR_REG2, data);
5484                 adev->gfx.rlc.in_safe_mode = false;
5485         }
5486
5487         for (i = 0; i < adev->usec_timeout; i++) {
5488                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5489                         break;
5490                 udelay(1);
5491         }
5492 }
5493
5494 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5495 {
5496         u32 data;
5497         unsigned i;
5498
5499         data = RREG32(mmRLC_CNTL);
5500         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5501                 return;
5502
5503         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5504                 data |= RLC_SAFE_MODE__CMD_MASK;
5505                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5506                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5507                 WREG32(mmRLC_SAFE_MODE, data);
5508
5509                 for (i = 0; i < adev->usec_timeout; i++) {
5510                         if ((RREG32(mmRLC_GPM_STAT) &
5511                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5512                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5513                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5514                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5515                                 break;
5516                         udelay(1);
5517                 }
5518
5519                 for (i = 0; i < adev->usec_timeout; i++) {
5520                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5521                                 break;
5522                         udelay(1);
5523                 }
5524                 adev->gfx.rlc.in_safe_mode = true;
5525         }
5526 }
5527
5528 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5529 {
5530         u32 data = 0;
5531         unsigned i;
5532
5533         data = RREG32(mmRLC_CNTL);
5534         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5535                 return;
5536
5537         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5538                 if (adev->gfx.rlc.in_safe_mode) {
5539                         data |= RLC_SAFE_MODE__CMD_MASK;
5540                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5541                         WREG32(mmRLC_SAFE_MODE, data);
5542                         adev->gfx.rlc.in_safe_mode = false;
5543                 }
5544         }
5545
5546         for (i = 0; i < adev->usec_timeout; i++) {
5547                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5548                         break;
5549                 udelay(1);
5550         }
5551 }
5552
5553 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5554 {
5555         adev->gfx.rlc.in_safe_mode = true;
5556 }
5557
5558 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5559 {
5560         adev->gfx.rlc.in_safe_mode = false;
5561 }
5562
5563 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5564         .enter_safe_mode = cz_enter_rlc_safe_mode,
5565         .exit_safe_mode = cz_exit_rlc_safe_mode
5566 };
5567
5568 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5569         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5570         .exit_safe_mode = iceland_exit_rlc_safe_mode
5571 };
5572
5573 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5574         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5575         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5576 };
5577
5578 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5579                                                       bool enable)
5580 {
5581         uint32_t temp, data;
5582
5583         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5584
5585         /* It is disabled by HW by default */
5586         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5587                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5588                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5589                                 /* 1 - RLC memory Light sleep */
5590                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5591
5592                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5593                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5594                 }
5595
5596                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5597                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5598                 if (adev->flags & AMD_IS_APU)
5599                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5600                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5601                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5602                 else
5603                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5604                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5605                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5606                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5607
5608                 if (temp != data)
5609                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5610
5611                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5612                 gfx_v8_0_wait_for_rlc_serdes(adev);
5613
5614                 /* 5 - clear mgcg override */
5615                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5616
5617                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5618                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5619                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5620                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5621                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5622                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5623                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5624                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5625                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5626                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5627                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5628                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5629                         if (temp != data)
5630                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5631                 }
5632                 udelay(50);
5633
5634                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5635                 gfx_v8_0_wait_for_rlc_serdes(adev);
5636         } else {
5637                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5638                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5639                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5640                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5641                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5642                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5643                 if (temp != data)
5644                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5645
5646                 /* 2 - disable MGLS in RLC */
5647                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5648                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5649                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5650                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5651                 }
5652
5653                 /* 3 - disable MGLS in CP */
5654                 data = RREG32(mmCP_MEM_SLP_CNTL);
5655                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5656                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5657                         WREG32(mmCP_MEM_SLP_CNTL, data);
5658                 }
5659
5660                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5661                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5662                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5663                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5664                 if (temp != data)
5665                         WREG32(mmCGTS_SM_CTRL_REG, data);
5666
5667                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5668                 gfx_v8_0_wait_for_rlc_serdes(adev);
5669
5670                 /* 6 - set mgcg override */
5671                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5672
5673                 udelay(50);
5674
5675                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5676                 gfx_v8_0_wait_for_rlc_serdes(adev);
5677         }
5678
5679         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5680 }
5681
5682 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5683                                                       bool enable)
5684 {
5685         uint32_t temp, temp1, data, data1;
5686
5687         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5688
5689         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5690
5691         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5692                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5693                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5694                 if (temp1 != data1)
5695                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5696
5697                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5698                 gfx_v8_0_wait_for_rlc_serdes(adev);
5699
5700                 /* 2 - clear cgcg override */
5701                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5702
5703                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5704                 gfx_v8_0_wait_for_rlc_serdes(adev);
5705
5706                 /* 3 - write cmd to set CGLS */
5707                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5708
5709                 /* 4 - enable cgcg */
5710                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5711
5712                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5713                         /* enable cgls*/
5714                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5715
5716                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5717                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5718
5719                         if (temp1 != data1)
5720                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5721                 } else {
5722                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5723                 }
5724
5725                 if (temp != data)
5726                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5727
5728                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5729                  * Cmp_busy/GFX_Idle interrupts
5730                  */
5731                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5732         } else {
5733                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5734                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5735
5736                 /* TEST CGCG */
5737                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5738                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5739                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5740                 if (temp1 != data1)
5741                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5742
5743                 /* read gfx register to wake up cgcg */
5744                 RREG32(mmCB_CGTT_SCLK_CTRL);
5745                 RREG32(mmCB_CGTT_SCLK_CTRL);
5746                 RREG32(mmCB_CGTT_SCLK_CTRL);
5747                 RREG32(mmCB_CGTT_SCLK_CTRL);
5748
5749                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5750                 gfx_v8_0_wait_for_rlc_serdes(adev);
5751
5752                 /* write cmd to Set CGCG Overrride */
5753                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5754
5755                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5756                 gfx_v8_0_wait_for_rlc_serdes(adev);
5757
5758                 /* write cmd to Clear CGLS */
5759                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5760
5761                 /* disable cgcg, cgls should be disabled too. */
5762                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5763                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5764                 if (temp != data)
5765                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5766         }
5767
5768         gfx_v8_0_wait_for_rlc_serdes(adev);
5769
5770         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5771 }
5772 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5773                                             bool enable)
5774 {
5775         if (enable) {
5776                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5777                  * ===  MGCG + MGLS + TS(CG/LS) ===
5778                  */
5779                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5780                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5781         } else {
5782                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5783                  * ===  CGCG + CGLS ===
5784                  */
5785                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5786                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5787         }
5788         return 0;
5789 }
5790
5791 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5792                                           enum amd_clockgating_state state)
5793 {
5794         uint32_t msg_id, pp_state;
5795         void *pp_handle = adev->powerplay.pp_handle;
5796
5797         if (state == AMD_CG_STATE_UNGATE)
5798                 pp_state = 0;
5799         else
5800                 pp_state = PP_STATE_CG | PP_STATE_LS;
5801
5802         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5803                         PP_BLOCK_GFX_CG,
5804                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5805                         pp_state);
5806         amd_set_clockgating_by_smu(pp_handle, msg_id);
5807
5808         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5809                         PP_BLOCK_GFX_MG,
5810                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5811                         pp_state);
5812         amd_set_clockgating_by_smu(pp_handle, msg_id);
5813
5814         return 0;
5815 }
5816
5817 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5818                                           enum amd_clockgating_state state)
5819 {
5820         uint32_t msg_id, pp_state;
5821         void *pp_handle = adev->powerplay.pp_handle;
5822
5823         if (state == AMD_CG_STATE_UNGATE)
5824                 pp_state = 0;
5825         else
5826                 pp_state = PP_STATE_CG | PP_STATE_LS;
5827
5828         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5829                         PP_BLOCK_GFX_CG,
5830                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5831                         pp_state);
5832         amd_set_clockgating_by_smu(pp_handle, msg_id);
5833
5834         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5835                         PP_BLOCK_GFX_3D,
5836                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5837                         pp_state);
5838         amd_set_clockgating_by_smu(pp_handle, msg_id);
5839
5840         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5841                         PP_BLOCK_GFX_MG,
5842                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5843                         pp_state);
5844         amd_set_clockgating_by_smu(pp_handle, msg_id);
5845
5846         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5847                         PP_BLOCK_GFX_RLC,
5848                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5849                         pp_state);
5850         amd_set_clockgating_by_smu(pp_handle, msg_id);
5851
5852         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5853                         PP_BLOCK_GFX_CP,
5854                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5855                         pp_state);
5856         amd_set_clockgating_by_smu(pp_handle, msg_id);
5857
5858         return 0;
5859 }
5860
5861 static int gfx_v8_0_set_clockgating_state(void *handle,
5862                                           enum amd_clockgating_state state)
5863 {
5864         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5865
5866         switch (adev->asic_type) {
5867         case CHIP_FIJI:
5868         case CHIP_CARRIZO:
5869         case CHIP_STONEY:
5870                 gfx_v8_0_update_gfx_clock_gating(adev,
5871                                                  state == AMD_CG_STATE_GATE ? true : false);
5872                 break;
5873         case CHIP_TONGA:
5874                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5875                 break;
5876         case CHIP_POLARIS10:
5877         case CHIP_POLARIS11:
5878                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5879                 break;
5880         default:
5881                 break;
5882         }
5883         return 0;
5884 }
5885
5886 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5887 {
5888         return ring->adev->wb.wb[ring->rptr_offs];
5889 }
5890
5891 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5892 {
5893         struct amdgpu_device *adev = ring->adev;
5894
5895         if (ring->use_doorbell)
5896                 /* XXX check if swapping is necessary on BE */
5897                 return ring->adev->wb.wb[ring->wptr_offs];
5898         else
5899                 return RREG32(mmCP_RB0_WPTR);
5900 }
5901
5902 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5903 {
5904         struct amdgpu_device *adev = ring->adev;
5905
5906         if (ring->use_doorbell) {
5907                 /* XXX check if swapping is necessary on BE */
5908                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5909                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5910         } else {
5911                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5912                 (void)RREG32(mmCP_RB0_WPTR);
5913         }
5914 }
5915
5916 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5917 {
5918         u32 ref_and_mask, reg_mem_engine;
5919
5920         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5921                 switch (ring->me) {
5922                 case 1:
5923                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5924                         break;
5925                 case 2:
5926                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5927                         break;
5928                 default:
5929                         return;
5930                 }
5931                 reg_mem_engine = 0;
5932         } else {
5933                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5934                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5935         }
5936
5937         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5938         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5939                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5940                                  reg_mem_engine));
5941         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5942         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5943         amdgpu_ring_write(ring, ref_and_mask);
5944         amdgpu_ring_write(ring, ref_and_mask);
5945         amdgpu_ring_write(ring, 0x20); /* poll interval */
5946 }
5947
5948 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
5949 {
5950         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
5951         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
5952                 EVENT_INDEX(4));
5953
5954         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
5955         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
5956                 EVENT_INDEX(0));
5957 }
5958
5959
5960 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5961 {
5962         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5963         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5964                                  WRITE_DATA_DST_SEL(0) |
5965                                  WR_CONFIRM));
5966         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5967         amdgpu_ring_write(ring, 0);
5968         amdgpu_ring_write(ring, 1);
5969
5970 }
5971
5972 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5973                                       struct amdgpu_ib *ib,
5974                                       unsigned vm_id, bool ctx_switch)
5975 {
5976         u32 header, control = 0;
5977
5978         if (ib->flags & AMDGPU_IB_FLAG_CE)
5979                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5980         else
5981                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5982
5983         control |= ib->length_dw | (vm_id << 24);
5984
5985         amdgpu_ring_write(ring, header);
5986         amdgpu_ring_write(ring,
5987 #ifdef __BIG_ENDIAN
5988                           (2 << 0) |
5989 #endif
5990                           (ib->gpu_addr & 0xFFFFFFFC));
5991         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5992         amdgpu_ring_write(ring, control);
5993 }
5994
5995 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5996                                           struct amdgpu_ib *ib,
5997                                           unsigned vm_id, bool ctx_switch)
5998 {
5999         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6000
6001         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6002         amdgpu_ring_write(ring,
6003 #ifdef __BIG_ENDIAN
6004                                 (2 << 0) |
6005 #endif
6006                                 (ib->gpu_addr & 0xFFFFFFFC));
6007         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6008         amdgpu_ring_write(ring, control);
6009 }
6010
6011 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6012                                          u64 seq, unsigned flags)
6013 {
6014         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6015         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6016
6017         /* EVENT_WRITE_EOP - flush caches, send int */
6018         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6019         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6020                                  EOP_TC_ACTION_EN |
6021                                  EOP_TC_WB_ACTION_EN |
6022                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6023                                  EVENT_INDEX(5)));
6024         amdgpu_ring_write(ring, addr & 0xfffffffc);
6025         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6026                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6027         amdgpu_ring_write(ring, lower_32_bits(seq));
6028         amdgpu_ring_write(ring, upper_32_bits(seq));
6029
6030 }
6031
6032 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6033 {
6034         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6035         uint32_t seq = ring->fence_drv.sync_seq;
6036         uint64_t addr = ring->fence_drv.gpu_addr;
6037
6038         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6039         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6040                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6041                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6042         amdgpu_ring_write(ring, addr & 0xfffffffc);
6043         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6044         amdgpu_ring_write(ring, seq);
6045         amdgpu_ring_write(ring, 0xffffffff);
6046         amdgpu_ring_write(ring, 4); /* poll interval */
6047 }
6048
6049 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6050                                         unsigned vm_id, uint64_t pd_addr)
6051 {
6052         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6053
6054         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6055         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6056                                  WRITE_DATA_DST_SEL(0)) |
6057                                  WR_CONFIRM);
6058         if (vm_id < 8) {
6059                 amdgpu_ring_write(ring,
6060                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6061         } else {
6062                 amdgpu_ring_write(ring,
6063                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6064         }
6065         amdgpu_ring_write(ring, 0);
6066         amdgpu_ring_write(ring, pd_addr >> 12);
6067
6068         /* bits 0-15 are the VM contexts0-15 */
6069         /* invalidate the cache */
6070         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6071         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6072                                  WRITE_DATA_DST_SEL(0)));
6073         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6074         amdgpu_ring_write(ring, 0);
6075         amdgpu_ring_write(ring, 1 << vm_id);
6076
6077         /* wait for the invalidate to complete */
6078         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6079         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6080                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6081                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6082         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6083         amdgpu_ring_write(ring, 0);
6084         amdgpu_ring_write(ring, 0); /* ref */
6085         amdgpu_ring_write(ring, 0); /* mask */
6086         amdgpu_ring_write(ring, 0x20); /* poll interval */
6087
6088         /* compute doesn't have PFP */
6089         if (usepfp) {
6090                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6091                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6092                 amdgpu_ring_write(ring, 0x0);
6093                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6094                 amdgpu_ring_insert_nop(ring, 128);
6095         }
6096 }
6097
6098 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6099 {
6100         return ring->adev->wb.wb[ring->wptr_offs];
6101 }
6102
6103 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6104 {
6105         struct amdgpu_device *adev = ring->adev;
6106
6107         /* XXX check if swapping is necessary on BE */
6108         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6109         WDOORBELL32(ring->doorbell_index, ring->wptr);
6110 }
6111
6112 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6113                                              u64 addr, u64 seq,
6114                                              unsigned flags)
6115 {
6116         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6117         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6118
6119         /* RELEASE_MEM - flush caches, send int */
6120         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6121         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6122                                  EOP_TC_ACTION_EN |
6123                                  EOP_TC_WB_ACTION_EN |
6124                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6125                                  EVENT_INDEX(5)));
6126         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6127         amdgpu_ring_write(ring, addr & 0xfffffffc);
6128         amdgpu_ring_write(ring, upper_32_bits(addr));
6129         amdgpu_ring_write(ring, lower_32_bits(seq));
6130         amdgpu_ring_write(ring, upper_32_bits(seq));
6131 }
6132
6133 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6134 {
6135         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6136         amdgpu_ring_write(ring, 0);
6137 }
6138
6139 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6140 {
6141         uint32_t dw2 = 0;
6142
6143         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6144         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6145                 gfx_v8_0_ring_emit_vgt_flush(ring);
6146                 /* set load_global_config & load_global_uconfig */
6147                 dw2 |= 0x8001;
6148                 /* set load_cs_sh_regs */
6149                 dw2 |= 0x01000000;
6150                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6151                 dw2 |= 0x10002;
6152
6153                 /* set load_ce_ram if preamble presented */
6154                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6155                         dw2 |= 0x10000000;
6156         } else {
6157                 /* still load_ce_ram if this is the first time preamble presented
6158                  * although there is no context switch happens.
6159                  */
6160                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6161                         dw2 |= 0x10000000;
6162         }
6163
6164         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6165         amdgpu_ring_write(ring, dw2);
6166         amdgpu_ring_write(ring, 0);
6167 }
6168
6169 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6170                                                  enum amdgpu_interrupt_state state)
6171 {
6172         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6173                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6174 }
6175
6176 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6177                                                      int me, int pipe,
6178                                                      enum amdgpu_interrupt_state state)
6179 {
6180         /*
6181          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6182          * handles the setting of interrupts for this specific pipe. All other
6183          * pipes' interrupts are set by amdkfd.
6184          */
6185
6186         if (me == 1) {
6187                 switch (pipe) {
6188                 case 0:
6189                         break;
6190                 default:
6191                         DRM_DEBUG("invalid pipe %d\n", pipe);
6192                         return;
6193                 }
6194         } else {
6195                 DRM_DEBUG("invalid me %d\n", me);
6196                 return;
6197         }
6198
6199         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6200                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6201 }
6202
6203 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6204                                              struct amdgpu_irq_src *source,
6205                                              unsigned type,
6206                                              enum amdgpu_interrupt_state state)
6207 {
6208         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6209                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6210
6211         return 0;
6212 }
6213
6214 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6215                                               struct amdgpu_irq_src *source,
6216                                               unsigned type,
6217                                               enum amdgpu_interrupt_state state)
6218 {
6219         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6220                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6221
6222         return 0;
6223 }
6224
6225 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6226                                             struct amdgpu_irq_src *src,
6227                                             unsigned type,
6228                                             enum amdgpu_interrupt_state state)
6229 {
6230         switch (type) {
6231         case AMDGPU_CP_IRQ_GFX_EOP:
6232                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6233                 break;
6234         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6235                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6236                 break;
6237         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6238                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6239                 break;
6240         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6241                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6242                 break;
6243         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6244                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6245                 break;
6246         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6247                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6248                 break;
6249         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6250                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6251                 break;
6252         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6253                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6254                 break;
6255         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6256                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6257                 break;
6258         default:
6259                 break;
6260         }
6261         return 0;
6262 }
6263
6264 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6265                             struct amdgpu_irq_src *source,
6266                             struct amdgpu_iv_entry *entry)
6267 {
6268         int i;
6269         u8 me_id, pipe_id, queue_id;
6270         struct amdgpu_ring *ring;
6271
6272         DRM_DEBUG("IH: CP EOP\n");
6273         me_id = (entry->ring_id & 0x0c) >> 2;
6274         pipe_id = (entry->ring_id & 0x03) >> 0;
6275         queue_id = (entry->ring_id & 0x70) >> 4;
6276
6277         switch (me_id) {
6278         case 0:
6279                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6280                 break;
6281         case 1:
6282         case 2:
6283                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6284                         ring = &adev->gfx.compute_ring[i];
6285                         /* Per-queue interrupt is supported for MEC starting from VI.
6286                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6287                           */
6288                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6289                                 amdgpu_fence_process(ring);
6290                 }
6291                 break;
6292         }
6293         return 0;
6294 }
6295
6296 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6297                                  struct amdgpu_irq_src *source,
6298                                  struct amdgpu_iv_entry *entry)
6299 {
6300         DRM_ERROR("Illegal register access in command stream\n");
6301         schedule_work(&adev->reset_work);
6302         return 0;
6303 }
6304
6305 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6306                                   struct amdgpu_irq_src *source,
6307                                   struct amdgpu_iv_entry *entry)
6308 {
6309         DRM_ERROR("Illegal instruction in command stream\n");
6310         schedule_work(&adev->reset_work);
6311         return 0;
6312 }
6313
6314 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6315         .name = "gfx_v8_0",
6316         .early_init = gfx_v8_0_early_init,
6317         .late_init = gfx_v8_0_late_init,
6318         .sw_init = gfx_v8_0_sw_init,
6319         .sw_fini = gfx_v8_0_sw_fini,
6320         .hw_init = gfx_v8_0_hw_init,
6321         .hw_fini = gfx_v8_0_hw_fini,
6322         .suspend = gfx_v8_0_suspend,
6323         .resume = gfx_v8_0_resume,
6324         .is_idle = gfx_v8_0_is_idle,
6325         .wait_for_idle = gfx_v8_0_wait_for_idle,
6326         .check_soft_reset = gfx_v8_0_check_soft_reset,
6327         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6328         .soft_reset = gfx_v8_0_soft_reset,
6329         .post_soft_reset = gfx_v8_0_post_soft_reset,
6330         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6331         .set_powergating_state = gfx_v8_0_set_powergating_state,
6332 };
6333
6334 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6335         .type = AMDGPU_RING_TYPE_GFX,
6336         .align_mask = 0xff,
6337         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6338         .get_rptr = gfx_v8_0_ring_get_rptr,
6339         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6340         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6341         .emit_frame_size =
6342                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6343                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6344                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6345                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6346                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6347                 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6348                 2 + /* gfx_v8_ring_emit_sb */
6349                 3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
6350         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6351         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6352         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6353         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6354         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6355         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6356         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6357         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6358         .test_ring = gfx_v8_0_ring_test_ring,
6359         .test_ib = gfx_v8_0_ring_test_ib,
6360         .insert_nop = amdgpu_ring_insert_nop,
6361         .pad_ib = amdgpu_ring_generic_pad_ib,
6362         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6363         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6364 };
6365
6366 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6367         .type = AMDGPU_RING_TYPE_COMPUTE,
6368         .align_mask = 0xff,
6369         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6370         .get_rptr = gfx_v8_0_ring_get_rptr,
6371         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6372         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6373         .emit_frame_size =
6374                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6375                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6376                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6377                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6378                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6379                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6380         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6381         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6382         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6383         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6384         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6385         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6386         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6387         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6388         .test_ring = gfx_v8_0_ring_test_ring,
6389         .test_ib = gfx_v8_0_ring_test_ib,
6390         .insert_nop = amdgpu_ring_insert_nop,
6391         .pad_ib = amdgpu_ring_generic_pad_ib,
6392 };
6393
6394 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6395 {
6396         int i;
6397
6398         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6399                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6400
6401         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6402                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6403 }
6404
6405 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6406         .set = gfx_v8_0_set_eop_interrupt_state,
6407         .process = gfx_v8_0_eop_irq,
6408 };
6409
6410 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6411         .set = gfx_v8_0_set_priv_reg_fault_state,
6412         .process = gfx_v8_0_priv_reg_irq,
6413 };
6414
6415 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6416         .set = gfx_v8_0_set_priv_inst_fault_state,
6417         .process = gfx_v8_0_priv_inst_irq,
6418 };
6419
6420 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6421 {
6422         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6423         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6424
6425         adev->gfx.priv_reg_irq.num_types = 1;
6426         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6427
6428         adev->gfx.priv_inst_irq.num_types = 1;
6429         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6430 }
6431
6432 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6433 {
6434         switch (adev->asic_type) {
6435         case CHIP_TOPAZ:
6436                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6437                 break;
6438         case CHIP_STONEY:
6439         case CHIP_CARRIZO:
6440                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6441                 break;
6442         default:
6443                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6444                 break;
6445         }
6446 }
6447
6448 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6449 {
6450         /* init asci gds info */
6451         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6452         adev->gds.gws.total_size = 64;
6453         adev->gds.oa.total_size = 16;
6454
6455         if (adev->gds.mem.total_size == 64 * 1024) {
6456                 adev->gds.mem.gfx_partition_size = 4096;
6457                 adev->gds.mem.cs_partition_size = 4096;
6458
6459                 adev->gds.gws.gfx_partition_size = 4;
6460                 adev->gds.gws.cs_partition_size = 4;
6461
6462                 adev->gds.oa.gfx_partition_size = 4;
6463                 adev->gds.oa.cs_partition_size = 1;
6464         } else {
6465                 adev->gds.mem.gfx_partition_size = 1024;
6466                 adev->gds.mem.cs_partition_size = 1024;
6467
6468                 adev->gds.gws.gfx_partition_size = 16;
6469                 adev->gds.gws.cs_partition_size = 16;
6470
6471                 adev->gds.oa.gfx_partition_size = 4;
6472                 adev->gds.oa.cs_partition_size = 4;
6473         }
6474 }
6475
6476 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6477                                                  u32 bitmap)
6478 {
6479         u32 data;
6480
6481         if (!bitmap)
6482                 return;
6483
6484         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6485         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6486
6487         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6488 }
6489
6490 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6491 {
6492         u32 data, mask;
6493
6494         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6495                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6496
6497         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6498
6499         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6500 }
6501
6502 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6503 {
6504         int i, j, k, counter, active_cu_number = 0;
6505         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6506         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6507         unsigned disable_masks[4 * 2];
6508
6509         memset(cu_info, 0, sizeof(*cu_info));
6510
6511         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6512
6513         mutex_lock(&adev->grbm_idx_mutex);
6514         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6515                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6516                         mask = 1;
6517                         ao_bitmap = 0;
6518                         counter = 0;
6519                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6520                         if (i < 4 && j < 2)
6521                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6522                                         adev, disable_masks[i * 2 + j]);
6523                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6524                         cu_info->bitmap[i][j] = bitmap;
6525
6526                         for (k = 0; k < 16; k ++) {
6527                                 if (bitmap & mask) {
6528                                         if (counter < 2)
6529                                                 ao_bitmap |= mask;
6530                                         counter ++;
6531                                 }
6532                                 mask <<= 1;
6533                         }
6534                         active_cu_number += counter;
6535                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6536                 }
6537         }
6538         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6539         mutex_unlock(&adev->grbm_idx_mutex);
6540
6541         cu_info->number = active_cu_number;
6542         cu_info->ao_cu_mask = ao_cu_mask;
6543 }
6544
6545 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6546 {
6547         .type = AMD_IP_BLOCK_TYPE_GFX,
6548         .major = 8,
6549         .minor = 0,
6550         .rev = 0,
6551         .funcs = &gfx_v8_0_ip_funcs,
6552 };
6553
6554 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6555 {
6556         .type = AMD_IP_BLOCK_TYPE_GFX,
6557         .major = 8,
6558         .minor = 1,
6559         .rev = 0,
6560         .funcs = &gfx_v8_0_ip_funcs,
6561 };