Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
139
140 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
151
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
170
171 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
172 {
173         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
189 };
190
191 static const u32 golden_settings_tonga_a11[] =
192 {
193         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196         mmGB_GPU_ID, 0x0000000f, 0x00000000,
197         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
200         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
201         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
202         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
204         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
205         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
208         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
209 };
210
211 static const u32 tonga_golden_common_all[] =
212 {
213         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
219         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
221 };
222
223 static const u32 tonga_mgcg_cgcg_init[] =
224 {
225         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
300 };
301
302 static const u32 golden_settings_vegam_a11[] =
303 {
304         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314         mmSQ_CONFIG, 0x07f80000, 0x01180000,
315         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 vegam_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
331 };
332
333 static const u32 golden_settings_polaris11_a11[] =
334 {
335         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
337         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
341         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
343         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
345         mmSQ_CONFIG, 0x07f80000, 0x01180000,
346         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
351         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
352 };
353
354 static const u32 polaris11_golden_common_all[] =
355 {
356         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
360         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
362 };
363
364 static const u32 golden_settings_polaris10_a11[] =
365 {
366         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
367         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
369         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377         mmSQ_CONFIG, 0x07f80000, 0x07180000,
378         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
382         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
383 };
384
385 static const u32 polaris10_golden_common_all[] =
386 {
387         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
393         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
395 };
396
397 static const u32 fiji_golden_common_all[] =
398 {
399         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
402         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
403         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
405         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
407         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
409 };
410
411 static const u32 golden_settings_fiji_a10[] =
412 {
413         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
416         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
417         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
419         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
420         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
422         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
423         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
424 };
425
426 static const u32 fiji_mgcg_cgcg_init[] =
427 {
428         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
463 };
464
465 static const u32 golden_settings_iceland_a11[] =
466 {
467         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470         mmGB_GPU_ID, 0x0000000f, 0x00000000,
471         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
475         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
476         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
477         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
479         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
480         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
483 };
484
485 static const u32 iceland_golden_common_all[] =
486 {
487         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
493         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
495 };
496
497 static const u32 iceland_mgcg_cgcg_init[] =
498 {
499         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
563 };
564
565 static const u32 cz_golden_settings_a11[] =
566 {
567         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569         mmGB_GPU_ID, 0x0000000f, 0x00000000,
570         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
573         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
574         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
575         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
576         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
577         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
579 };
580
581 static const u32 cz_golden_common_all[] =
582 {
583         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
589         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
591 };
592
593 static const u32 cz_mgcg_cgcg_init[] =
594 {
595         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
670 };
671
672 static const u32 stoney_golden_settings_a11[] =
673 {
674         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675         mmGB_GPU_ID, 0x0000000f, 0x00000000,
676         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
680         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
681         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
684 };
685
686 static const u32 stoney_golden_common_all[] =
687 {
688         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
694         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
696 };
697
698 static const u32 stoney_mgcg_cgcg_init[] =
699 {
700         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
705 };
706
707 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
708 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
709 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
710 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
711 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
712 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
713 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
714 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
715
716 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
717 {
718         switch (adev->asic_type) {
719         case CHIP_TOPAZ:
720                 amdgpu_device_program_register_sequence(adev,
721                                                         iceland_mgcg_cgcg_init,
722                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
723                 amdgpu_device_program_register_sequence(adev,
724                                                         golden_settings_iceland_a11,
725                                                         ARRAY_SIZE(golden_settings_iceland_a11));
726                 amdgpu_device_program_register_sequence(adev,
727                                                         iceland_golden_common_all,
728                                                         ARRAY_SIZE(iceland_golden_common_all));
729                 break;
730         case CHIP_FIJI:
731                 amdgpu_device_program_register_sequence(adev,
732                                                         fiji_mgcg_cgcg_init,
733                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
734                 amdgpu_device_program_register_sequence(adev,
735                                                         golden_settings_fiji_a10,
736                                                         ARRAY_SIZE(golden_settings_fiji_a10));
737                 amdgpu_device_program_register_sequence(adev,
738                                                         fiji_golden_common_all,
739                                                         ARRAY_SIZE(fiji_golden_common_all));
740                 break;
741
742         case CHIP_TONGA:
743                 amdgpu_device_program_register_sequence(adev,
744                                                         tonga_mgcg_cgcg_init,
745                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
746                 amdgpu_device_program_register_sequence(adev,
747                                                         golden_settings_tonga_a11,
748                                                         ARRAY_SIZE(golden_settings_tonga_a11));
749                 amdgpu_device_program_register_sequence(adev,
750                                                         tonga_golden_common_all,
751                                                         ARRAY_SIZE(tonga_golden_common_all));
752                 break;
753         case CHIP_VEGAM:
754                 amdgpu_device_program_register_sequence(adev,
755                                                         golden_settings_vegam_a11,
756                                                         ARRAY_SIZE(golden_settings_vegam_a11));
757                 amdgpu_device_program_register_sequence(adev,
758                                                         vegam_golden_common_all,
759                                                         ARRAY_SIZE(vegam_golden_common_all));
760                 break;
761         case CHIP_POLARIS11:
762         case CHIP_POLARIS12:
763                 amdgpu_device_program_register_sequence(adev,
764                                                         golden_settings_polaris11_a11,
765                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
766                 amdgpu_device_program_register_sequence(adev,
767                                                         polaris11_golden_common_all,
768                                                         ARRAY_SIZE(polaris11_golden_common_all));
769                 break;
770         case CHIP_POLARIS10:
771                 amdgpu_device_program_register_sequence(adev,
772                                                         golden_settings_polaris10_a11,
773                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
774                 amdgpu_device_program_register_sequence(adev,
775                                                         polaris10_golden_common_all,
776                                                         ARRAY_SIZE(polaris10_golden_common_all));
777                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
778                 if (adev->pdev->revision == 0xc7 &&
779                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
780                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
781                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
782                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
783                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
784                 }
785                 break;
786         case CHIP_CARRIZO:
787                 amdgpu_device_program_register_sequence(adev,
788                                                         cz_mgcg_cgcg_init,
789                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
790                 amdgpu_device_program_register_sequence(adev,
791                                                         cz_golden_settings_a11,
792                                                         ARRAY_SIZE(cz_golden_settings_a11));
793                 amdgpu_device_program_register_sequence(adev,
794                                                         cz_golden_common_all,
795                                                         ARRAY_SIZE(cz_golden_common_all));
796                 break;
797         case CHIP_STONEY:
798                 amdgpu_device_program_register_sequence(adev,
799                                                         stoney_mgcg_cgcg_init,
800                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
801                 amdgpu_device_program_register_sequence(adev,
802                                                         stoney_golden_settings_a11,
803                                                         ARRAY_SIZE(stoney_golden_settings_a11));
804                 amdgpu_device_program_register_sequence(adev,
805                                                         stoney_golden_common_all,
806                                                         ARRAY_SIZE(stoney_golden_common_all));
807                 break;
808         default:
809                 break;
810         }
811 }
812
813 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
814 {
815         adev->gfx.scratch.num_reg = 8;
816         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
817         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
818 }
819
820 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
821 {
822         struct amdgpu_device *adev = ring->adev;
823         uint32_t scratch;
824         uint32_t tmp = 0;
825         unsigned i;
826         int r;
827
828         r = amdgpu_gfx_scratch_get(adev, &scratch);
829         if (r) {
830                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
831                 return r;
832         }
833         WREG32(scratch, 0xCAFEDEAD);
834         r = amdgpu_ring_alloc(ring, 3);
835         if (r) {
836                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
837                           ring->idx, r);
838                 amdgpu_gfx_scratch_free(adev, scratch);
839                 return r;
840         }
841         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
842         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
843         amdgpu_ring_write(ring, 0xDEADBEEF);
844         amdgpu_ring_commit(ring);
845
846         for (i = 0; i < adev->usec_timeout; i++) {
847                 tmp = RREG32(scratch);
848                 if (tmp == 0xDEADBEEF)
849                         break;
850                 DRM_UDELAY(1);
851         }
852         if (i < adev->usec_timeout) {
853                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
854                          ring->idx, i);
855         } else {
856                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
857                           ring->idx, scratch, tmp);
858                 r = -EINVAL;
859         }
860         amdgpu_gfx_scratch_free(adev, scratch);
861         return r;
862 }
863
864 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
865 {
866         struct amdgpu_device *adev = ring->adev;
867         struct amdgpu_ib ib;
868         struct dma_fence *f = NULL;
869         uint32_t scratch;
870         uint32_t tmp = 0;
871         long r;
872
873         r = amdgpu_gfx_scratch_get(adev, &scratch);
874         if (r) {
875                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
876                 return r;
877         }
878         WREG32(scratch, 0xCAFEDEAD);
879         memset(&ib, 0, sizeof(ib));
880         r = amdgpu_ib_get(adev, NULL, 256, &ib);
881         if (r) {
882                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
883                 goto err1;
884         }
885         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
886         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
887         ib.ptr[2] = 0xDEADBEEF;
888         ib.length_dw = 3;
889
890         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
891         if (r)
892                 goto err2;
893
894         r = dma_fence_wait_timeout(f, false, timeout);
895         if (r == 0) {
896                 DRM_ERROR("amdgpu: IB test timed out.\n");
897                 r = -ETIMEDOUT;
898                 goto err2;
899         } else if (r < 0) {
900                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
901                 goto err2;
902         }
903         tmp = RREG32(scratch);
904         if (tmp == 0xDEADBEEF) {
905                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
906                 r = 0;
907         } else {
908                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
909                           scratch, tmp);
910                 r = -EINVAL;
911         }
912 err2:
913         amdgpu_ib_free(adev, &ib, NULL);
914         dma_fence_put(f);
915 err1:
916         amdgpu_gfx_scratch_free(adev, scratch);
917         return r;
918 }
919
920
921 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
922 {
923         release_firmware(adev->gfx.pfp_fw);
924         adev->gfx.pfp_fw = NULL;
925         release_firmware(adev->gfx.me_fw);
926         adev->gfx.me_fw = NULL;
927         release_firmware(adev->gfx.ce_fw);
928         adev->gfx.ce_fw = NULL;
929         release_firmware(adev->gfx.rlc_fw);
930         adev->gfx.rlc_fw = NULL;
931         release_firmware(adev->gfx.mec_fw);
932         adev->gfx.mec_fw = NULL;
933         if ((adev->asic_type != CHIP_STONEY) &&
934             (adev->asic_type != CHIP_TOPAZ))
935                 release_firmware(adev->gfx.mec2_fw);
936         adev->gfx.mec2_fw = NULL;
937
938         kfree(adev->gfx.rlc.register_list_format);
939 }
940
941 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
942 {
943         const char *chip_name;
944         char fw_name[30];
945         int err;
946         struct amdgpu_firmware_info *info = NULL;
947         const struct common_firmware_header *header = NULL;
948         const struct gfx_firmware_header_v1_0 *cp_hdr;
949         const struct rlc_firmware_header_v2_0 *rlc_hdr;
950         unsigned int *tmp = NULL, i;
951
952         DRM_DEBUG("\n");
953
954         switch (adev->asic_type) {
955         case CHIP_TOPAZ:
956                 chip_name = "topaz";
957                 break;
958         case CHIP_TONGA:
959                 chip_name = "tonga";
960                 break;
961         case CHIP_CARRIZO:
962                 chip_name = "carrizo";
963                 break;
964         case CHIP_FIJI:
965                 chip_name = "fiji";
966                 break;
967         case CHIP_STONEY:
968                 chip_name = "stoney";
969                 break;
970         case CHIP_POLARIS10:
971                 chip_name = "polaris10";
972                 break;
973         case CHIP_POLARIS11:
974                 chip_name = "polaris11";
975                 break;
976         case CHIP_POLARIS12:
977                 chip_name = "polaris12";
978                 break;
979         case CHIP_VEGAM:
980                 chip_name = "vegam";
981                 break;
982         default:
983                 BUG();
984         }
985
986         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
987                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
988                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
989                 if (err == -ENOENT) {
990                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
991                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
992                 }
993         } else {
994                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996         }
997         if (err)
998                 goto out;
999         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1000         if (err)
1001                 goto out;
1002         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1003         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1004         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1005
1006         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1008                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1009                 if (err == -ENOENT) {
1010                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1011                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1012                 }
1013         } else {
1014                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016         }
1017         if (err)
1018                 goto out;
1019         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1020         if (err)
1021                 goto out;
1022         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1023         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024
1025         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1026
1027         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1028                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1029                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1030                 if (err == -ENOENT) {
1031                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1032                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1033                 }
1034         } else {
1035                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037         }
1038         if (err)
1039                 goto out;
1040         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1041         if (err)
1042                 goto out;
1043         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1044         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1045         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1046
1047         /*
1048          * Support for MCBP/Virtualization in combination with chained IBs is
1049          * formal released on feature version #46
1050          */
1051         if (adev->gfx.ce_feature_version >= 46 &&
1052             adev->gfx.pfp_feature_version >= 46) {
1053                 adev->virt.chained_ib_support = true;
1054                 DRM_INFO("Chained IB support enabled!\n");
1055         } else
1056                 adev->virt.chained_ib_support = false;
1057
1058         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1059         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1060         if (err)
1061                 goto out;
1062         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1063         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1064         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1065         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1066
1067         adev->gfx.rlc.save_and_restore_offset =
1068                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1069         adev->gfx.rlc.clear_state_descriptor_offset =
1070                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1071         adev->gfx.rlc.avail_scratch_ram_locations =
1072                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1073         adev->gfx.rlc.reg_restore_list_size =
1074                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1075         adev->gfx.rlc.reg_list_format_start =
1076                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1077         adev->gfx.rlc.reg_list_format_separate_start =
1078                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1079         adev->gfx.rlc.starting_offsets_start =
1080                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1081         adev->gfx.rlc.reg_list_format_size_bytes =
1082                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1083         adev->gfx.rlc.reg_list_size_bytes =
1084                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1085
1086         adev->gfx.rlc.register_list_format =
1087                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1088                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1089
1090         if (!adev->gfx.rlc.register_list_format) {
1091                 err = -ENOMEM;
1092                 goto out;
1093         }
1094
1095         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1096                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1097         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1098                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1099
1100         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1101
1102         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1103                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1104         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1105                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1106
1107         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1108                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1109                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1110                 if (err == -ENOENT) {
1111                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1112                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1113                 }
1114         } else {
1115                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117         }
1118         if (err)
1119                 goto out;
1120         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1121         if (err)
1122                 goto out;
1123         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1124         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1125         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1126
1127         if ((adev->asic_type != CHIP_STONEY) &&
1128             (adev->asic_type != CHIP_TOPAZ)) {
1129                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1130                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1131                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1132                         if (err == -ENOENT) {
1133                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1134                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1135                         }
1136                 } else {
1137                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139                 }
1140                 if (!err) {
1141                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1142                         if (err)
1143                                 goto out;
1144                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1145                                 adev->gfx.mec2_fw->data;
1146                         adev->gfx.mec2_fw_version =
1147                                 le32_to_cpu(cp_hdr->header.ucode_version);
1148                         adev->gfx.mec2_feature_version =
1149                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1150                 } else {
1151                         err = 0;
1152                         adev->gfx.mec2_fw = NULL;
1153                 }
1154         }
1155
1156         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1157                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1158                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1159                 info->fw = adev->gfx.pfp_fw;
1160                 header = (const struct common_firmware_header *)info->fw->data;
1161                 adev->firmware.fw_size +=
1162                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163
1164                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1165                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1166                 info->fw = adev->gfx.me_fw;
1167                 header = (const struct common_firmware_header *)info->fw->data;
1168                 adev->firmware.fw_size +=
1169                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170
1171                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1172                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1173                 info->fw = adev->gfx.ce_fw;
1174                 header = (const struct common_firmware_header *)info->fw->data;
1175                 adev->firmware.fw_size +=
1176                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177
1178                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1179                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1180                 info->fw = adev->gfx.rlc_fw;
1181                 header = (const struct common_firmware_header *)info->fw->data;
1182                 adev->firmware.fw_size +=
1183                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184
1185                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1186                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1187                 info->fw = adev->gfx.mec_fw;
1188                 header = (const struct common_firmware_header *)info->fw->data;
1189                 adev->firmware.fw_size +=
1190                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191
1192                 /* we need account JT in */
1193                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1194                 adev->firmware.fw_size +=
1195                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1196
1197                 if (amdgpu_sriov_vf(adev)) {
1198                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1199                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1200                         info->fw = adev->gfx.mec_fw;
1201                         adev->firmware.fw_size +=
1202                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1203                 }
1204
1205                 if (adev->gfx.mec2_fw) {
1206                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1207                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1208                         info->fw = adev->gfx.mec2_fw;
1209                         header = (const struct common_firmware_header *)info->fw->data;
1210                         adev->firmware.fw_size +=
1211                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1212                 }
1213
1214         }
1215
1216 out:
1217         if (err) {
1218                 dev_err(adev->dev,
1219                         "gfx8: Failed to load firmware \"%s\"\n",
1220                         fw_name);
1221                 release_firmware(adev->gfx.pfp_fw);
1222                 adev->gfx.pfp_fw = NULL;
1223                 release_firmware(adev->gfx.me_fw);
1224                 adev->gfx.me_fw = NULL;
1225                 release_firmware(adev->gfx.ce_fw);
1226                 adev->gfx.ce_fw = NULL;
1227                 release_firmware(adev->gfx.rlc_fw);
1228                 adev->gfx.rlc_fw = NULL;
1229                 release_firmware(adev->gfx.mec_fw);
1230                 adev->gfx.mec_fw = NULL;
1231                 release_firmware(adev->gfx.mec2_fw);
1232                 adev->gfx.mec2_fw = NULL;
1233         }
1234         return err;
1235 }
1236
1237 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1238                                     volatile u32 *buffer)
1239 {
1240         u32 count = 0, i;
1241         const struct cs_section_def *sect = NULL;
1242         const struct cs_extent_def *ext = NULL;
1243
1244         if (adev->gfx.rlc.cs_data == NULL)
1245                 return;
1246         if (buffer == NULL)
1247                 return;
1248
1249         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1250         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1251
1252         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1253         buffer[count++] = cpu_to_le32(0x80000000);
1254         buffer[count++] = cpu_to_le32(0x80000000);
1255
1256         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1257                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1258                         if (sect->id == SECT_CONTEXT) {
1259                                 buffer[count++] =
1260                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1261                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1262                                                 PACKET3_SET_CONTEXT_REG_START);
1263                                 for (i = 0; i < ext->reg_count; i++)
1264                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1265                         } else {
1266                                 return;
1267                         }
1268                 }
1269         }
1270
1271         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1272         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1273                         PACKET3_SET_CONTEXT_REG_START);
1274         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1275         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1276
1277         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1278         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1279
1280         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1281         buffer[count++] = cpu_to_le32(0);
1282 }
1283
1284 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1285 {
1286         const __le32 *fw_data;
1287         volatile u32 *dst_ptr;
1288         int me, i, max_me = 4;
1289         u32 bo_offset = 0;
1290         u32 table_offset, table_size;
1291
1292         if (adev->asic_type == CHIP_CARRIZO)
1293                 max_me = 5;
1294
1295         /* write the cp table buffer */
1296         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1297         for (me = 0; me < max_me; me++) {
1298                 if (me == 0) {
1299                         const struct gfx_firmware_header_v1_0 *hdr =
1300                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1301                         fw_data = (const __le32 *)
1302                                 (adev->gfx.ce_fw->data +
1303                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1304                         table_offset = le32_to_cpu(hdr->jt_offset);
1305                         table_size = le32_to_cpu(hdr->jt_size);
1306                 } else if (me == 1) {
1307                         const struct gfx_firmware_header_v1_0 *hdr =
1308                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1309                         fw_data = (const __le32 *)
1310                                 (adev->gfx.pfp_fw->data +
1311                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1312                         table_offset = le32_to_cpu(hdr->jt_offset);
1313                         table_size = le32_to_cpu(hdr->jt_size);
1314                 } else if (me == 2) {
1315                         const struct gfx_firmware_header_v1_0 *hdr =
1316                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1317                         fw_data = (const __le32 *)
1318                                 (adev->gfx.me_fw->data +
1319                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1320                         table_offset = le32_to_cpu(hdr->jt_offset);
1321                         table_size = le32_to_cpu(hdr->jt_size);
1322                 } else if (me == 3) {
1323                         const struct gfx_firmware_header_v1_0 *hdr =
1324                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1325                         fw_data = (const __le32 *)
1326                                 (adev->gfx.mec_fw->data +
1327                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1328                         table_offset = le32_to_cpu(hdr->jt_offset);
1329                         table_size = le32_to_cpu(hdr->jt_size);
1330                 } else  if (me == 4) {
1331                         const struct gfx_firmware_header_v1_0 *hdr =
1332                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1333                         fw_data = (const __le32 *)
1334                                 (adev->gfx.mec2_fw->data +
1335                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1336                         table_offset = le32_to_cpu(hdr->jt_offset);
1337                         table_size = le32_to_cpu(hdr->jt_size);
1338                 }
1339
1340                 for (i = 0; i < table_size; i ++) {
1341                         dst_ptr[bo_offset + i] =
1342                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1343                 }
1344
1345                 bo_offset += table_size;
1346         }
1347 }
1348
1349 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1350 {
1351         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1352         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1353 }
1354
1355 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1356 {
1357         volatile u32 *dst_ptr;
1358         u32 dws;
1359         const struct cs_section_def *cs_data;
1360         int r;
1361
1362         adev->gfx.rlc.cs_data = vi_cs_data;
1363
1364         cs_data = adev->gfx.rlc.cs_data;
1365
1366         if (cs_data) {
1367                 /* clear state block */
1368                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1369
1370                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1371                                               AMDGPU_GEM_DOMAIN_VRAM,
1372                                               &adev->gfx.rlc.clear_state_obj,
1373                                               &adev->gfx.rlc.clear_state_gpu_addr,
1374                                               (void **)&adev->gfx.rlc.cs_ptr);
1375                 if (r) {
1376                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1377                         gfx_v8_0_rlc_fini(adev);
1378                         return r;
1379                 }
1380
1381                 /* set up the cs buffer */
1382                 dst_ptr = adev->gfx.rlc.cs_ptr;
1383                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1384                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1385                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1386         }
1387
1388         if ((adev->asic_type == CHIP_CARRIZO) ||
1389             (adev->asic_type == CHIP_STONEY)) {
1390                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1391                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1392                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1393                                               &adev->gfx.rlc.cp_table_obj,
1394                                               &adev->gfx.rlc.cp_table_gpu_addr,
1395                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1396                 if (r) {
1397                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1398                         return r;
1399                 }
1400
1401                 cz_init_cp_jump_table(adev);
1402
1403                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1404                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1405         }
1406
1407         return 0;
1408 }
1409
1410 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1411 {
1412         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1413 }
1414
1415 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1416 {
1417         int r;
1418         u32 *hpd;
1419         size_t mec_hpd_size;
1420
1421         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1422
1423         /* take ownership of the relevant compute queues */
1424         amdgpu_gfx_compute_queue_acquire(adev);
1425
1426         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1427
1428         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1429                                       AMDGPU_GEM_DOMAIN_GTT,
1430                                       &adev->gfx.mec.hpd_eop_obj,
1431                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1432                                       (void **)&hpd);
1433         if (r) {
1434                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1435                 return r;
1436         }
1437
1438         memset(hpd, 0, mec_hpd_size);
1439
1440         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1441         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1442
1443         return 0;
1444 }
1445
1446 static const u32 vgpr_init_compute_shader[] =
1447 {
1448         0x7e000209, 0x7e020208,
1449         0x7e040207, 0x7e060206,
1450         0x7e080205, 0x7e0a0204,
1451         0x7e0c0203, 0x7e0e0202,
1452         0x7e100201, 0x7e120200,
1453         0x7e140209, 0x7e160208,
1454         0x7e180207, 0x7e1a0206,
1455         0x7e1c0205, 0x7e1e0204,
1456         0x7e200203, 0x7e220202,
1457         0x7e240201, 0x7e260200,
1458         0x7e280209, 0x7e2a0208,
1459         0x7e2c0207, 0x7e2e0206,
1460         0x7e300205, 0x7e320204,
1461         0x7e340203, 0x7e360202,
1462         0x7e380201, 0x7e3a0200,
1463         0x7e3c0209, 0x7e3e0208,
1464         0x7e400207, 0x7e420206,
1465         0x7e440205, 0x7e460204,
1466         0x7e480203, 0x7e4a0202,
1467         0x7e4c0201, 0x7e4e0200,
1468         0x7e500209, 0x7e520208,
1469         0x7e540207, 0x7e560206,
1470         0x7e580205, 0x7e5a0204,
1471         0x7e5c0203, 0x7e5e0202,
1472         0x7e600201, 0x7e620200,
1473         0x7e640209, 0x7e660208,
1474         0x7e680207, 0x7e6a0206,
1475         0x7e6c0205, 0x7e6e0204,
1476         0x7e700203, 0x7e720202,
1477         0x7e740201, 0x7e760200,
1478         0x7e780209, 0x7e7a0208,
1479         0x7e7c0207, 0x7e7e0206,
1480         0xbf8a0000, 0xbf810000,
1481 };
1482
1483 static const u32 sgpr_init_compute_shader[] =
1484 {
1485         0xbe8a0100, 0xbe8c0102,
1486         0xbe8e0104, 0xbe900106,
1487         0xbe920108, 0xbe940100,
1488         0xbe960102, 0xbe980104,
1489         0xbe9a0106, 0xbe9c0108,
1490         0xbe9e0100, 0xbea00102,
1491         0xbea20104, 0xbea40106,
1492         0xbea60108, 0xbea80100,
1493         0xbeaa0102, 0xbeac0104,
1494         0xbeae0106, 0xbeb00108,
1495         0xbeb20100, 0xbeb40102,
1496         0xbeb60104, 0xbeb80106,
1497         0xbeba0108, 0xbebc0100,
1498         0xbebe0102, 0xbec00104,
1499         0xbec20106, 0xbec40108,
1500         0xbec60100, 0xbec80102,
1501         0xbee60004, 0xbee70005,
1502         0xbeea0006, 0xbeeb0007,
1503         0xbee80008, 0xbee90009,
1504         0xbefc0000, 0xbf8a0000,
1505         0xbf810000, 0x00000000,
1506 };
1507
1508 static const u32 vgpr_init_regs[] =
1509 {
1510         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1511         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1512         mmCOMPUTE_NUM_THREAD_X, 256*4,
1513         mmCOMPUTE_NUM_THREAD_Y, 1,
1514         mmCOMPUTE_NUM_THREAD_Z, 1,
1515         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1516         mmCOMPUTE_PGM_RSRC2, 20,
1517         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1518         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1519         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1520         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1521         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1522         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1523         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1524         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1525         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1526         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1527 };
1528
1529 static const u32 sgpr1_init_regs[] =
1530 {
1531         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1532         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1533         mmCOMPUTE_NUM_THREAD_X, 256*5,
1534         mmCOMPUTE_NUM_THREAD_Y, 1,
1535         mmCOMPUTE_NUM_THREAD_Z, 1,
1536         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1537         mmCOMPUTE_PGM_RSRC2, 20,
1538         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1539         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1540         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1541         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1542         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1543         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1544         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1545         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1546         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1547         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1548 };
1549
1550 static const u32 sgpr2_init_regs[] =
1551 {
1552         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1553         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1554         mmCOMPUTE_NUM_THREAD_X, 256*5,
1555         mmCOMPUTE_NUM_THREAD_Y, 1,
1556         mmCOMPUTE_NUM_THREAD_Z, 1,
1557         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1558         mmCOMPUTE_PGM_RSRC2, 20,
1559         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1560         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1561         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1562         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1563         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1564         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1565         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1566         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1567         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1568         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1569 };
1570
1571 static const u32 sec_ded_counter_registers[] =
1572 {
1573         mmCPC_EDC_ATC_CNT,
1574         mmCPC_EDC_SCRATCH_CNT,
1575         mmCPC_EDC_UCODE_CNT,
1576         mmCPF_EDC_ATC_CNT,
1577         mmCPF_EDC_ROQ_CNT,
1578         mmCPF_EDC_TAG_CNT,
1579         mmCPG_EDC_ATC_CNT,
1580         mmCPG_EDC_DMA_CNT,
1581         mmCPG_EDC_TAG_CNT,
1582         mmDC_EDC_CSINVOC_CNT,
1583         mmDC_EDC_RESTORE_CNT,
1584         mmDC_EDC_STATE_CNT,
1585         mmGDS_EDC_CNT,
1586         mmGDS_EDC_GRBM_CNT,
1587         mmGDS_EDC_OA_DED,
1588         mmSPI_EDC_CNT,
1589         mmSQC_ATC_EDC_GATCL1_CNT,
1590         mmSQC_EDC_CNT,
1591         mmSQ_EDC_DED_CNT,
1592         mmSQ_EDC_INFO,
1593         mmSQ_EDC_SEC_CNT,
1594         mmTCC_EDC_CNT,
1595         mmTCP_ATC_EDC_GATCL1_CNT,
1596         mmTCP_EDC_CNT,
1597         mmTD_EDC_CNT
1598 };
1599
1600 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1601 {
1602         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1603         struct amdgpu_ib ib;
1604         struct dma_fence *f = NULL;
1605         int r, i;
1606         u32 tmp;
1607         unsigned total_size, vgpr_offset, sgpr_offset;
1608         u64 gpu_addr;
1609
1610         /* only supported on CZ */
1611         if (adev->asic_type != CHIP_CARRIZO)
1612                 return 0;
1613
1614         /* bail if the compute ring is not ready */
1615         if (!ring->ready)
1616                 return 0;
1617
1618         tmp = RREG32(mmGB_EDC_MODE);
1619         WREG32(mmGB_EDC_MODE, 0);
1620
1621         total_size =
1622                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1623         total_size +=
1624                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1625         total_size +=
1626                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1627         total_size = ALIGN(total_size, 256);
1628         vgpr_offset = total_size;
1629         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1630         sgpr_offset = total_size;
1631         total_size += sizeof(sgpr_init_compute_shader);
1632
1633         /* allocate an indirect buffer to put the commands in */
1634         memset(&ib, 0, sizeof(ib));
1635         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1636         if (r) {
1637                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1638                 return r;
1639         }
1640
1641         /* load the compute shaders */
1642         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1643                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1644
1645         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1646                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1647
1648         /* init the ib length to 0 */
1649         ib.length_dw = 0;
1650
1651         /* VGPR */
1652         /* write the register state for the compute dispatch */
1653         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1654                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1656                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1657         }
1658         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1660         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665         /* write dispatch packet */
1666         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667         ib.ptr[ib.length_dw++] = 8; /* x */
1668         ib.ptr[ib.length_dw++] = 1; /* y */
1669         ib.ptr[ib.length_dw++] = 1; /* z */
1670         ib.ptr[ib.length_dw++] =
1671                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673         /* write CS partial flush packet */
1674         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677         /* SGPR1 */
1678         /* write the register state for the compute dispatch */
1679         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1680                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1682                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1683         }
1684         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690
1691         /* write dispatch packet */
1692         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693         ib.ptr[ib.length_dw++] = 8; /* x */
1694         ib.ptr[ib.length_dw++] = 1; /* y */
1695         ib.ptr[ib.length_dw++] = 1; /* z */
1696         ib.ptr[ib.length_dw++] =
1697                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698
1699         /* write CS partial flush packet */
1700         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702
1703         /* SGPR2 */
1704         /* write the register state for the compute dispatch */
1705         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1706                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1707                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1708                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1709         }
1710         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1711         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1712         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1713         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1714         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1715         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1716
1717         /* write dispatch packet */
1718         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1719         ib.ptr[ib.length_dw++] = 8; /* x */
1720         ib.ptr[ib.length_dw++] = 1; /* y */
1721         ib.ptr[ib.length_dw++] = 1; /* z */
1722         ib.ptr[ib.length_dw++] =
1723                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1724
1725         /* write CS partial flush packet */
1726         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1727         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1728
1729         /* shedule the ib on the ring */
1730         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1731         if (r) {
1732                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1733                 goto fail;
1734         }
1735
1736         /* wait for the GPU to finish processing the IB */
1737         r = dma_fence_wait(f, false);
1738         if (r) {
1739                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1740                 goto fail;
1741         }
1742
1743         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1744         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1745         WREG32(mmGB_EDC_MODE, tmp);
1746
1747         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1748         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1749         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1750
1751
1752         /* read back registers to clear the counters */
1753         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1754                 RREG32(sec_ded_counter_registers[i]);
1755
1756 fail:
1757         amdgpu_ib_free(adev, &ib, NULL);
1758         dma_fence_put(f);
1759
1760         return r;
1761 }
1762
1763 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1764 {
1765         u32 gb_addr_config;
1766         u32 mc_shared_chmap, mc_arb_ramcfg;
1767         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1768         u32 tmp;
1769         int ret;
1770
1771         switch (adev->asic_type) {
1772         case CHIP_TOPAZ:
1773                 adev->gfx.config.max_shader_engines = 1;
1774                 adev->gfx.config.max_tile_pipes = 2;
1775                 adev->gfx.config.max_cu_per_sh = 6;
1776                 adev->gfx.config.max_sh_per_se = 1;
1777                 adev->gfx.config.max_backends_per_se = 2;
1778                 adev->gfx.config.max_texture_channel_caches = 2;
1779                 adev->gfx.config.max_gprs = 256;
1780                 adev->gfx.config.max_gs_threads = 32;
1781                 adev->gfx.config.max_hw_contexts = 8;
1782
1783                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1784                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1785                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1786                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1787                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1788                 break;
1789         case CHIP_FIJI:
1790                 adev->gfx.config.max_shader_engines = 4;
1791                 adev->gfx.config.max_tile_pipes = 16;
1792                 adev->gfx.config.max_cu_per_sh = 16;
1793                 adev->gfx.config.max_sh_per_se = 1;
1794                 adev->gfx.config.max_backends_per_se = 4;
1795                 adev->gfx.config.max_texture_channel_caches = 16;
1796                 adev->gfx.config.max_gprs = 256;
1797                 adev->gfx.config.max_gs_threads = 32;
1798                 adev->gfx.config.max_hw_contexts = 8;
1799
1800                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1801                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1802                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1803                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1804                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1805                 break;
1806         case CHIP_POLARIS11:
1807         case CHIP_POLARIS12:
1808                 ret = amdgpu_atombios_get_gfx_info(adev);
1809                 if (ret)
1810                         return ret;
1811                 adev->gfx.config.max_gprs = 256;
1812                 adev->gfx.config.max_gs_threads = 32;
1813                 adev->gfx.config.max_hw_contexts = 8;
1814
1815                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1816                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1817                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1818                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1819                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1820                 break;
1821         case CHIP_POLARIS10:
1822         case CHIP_VEGAM:
1823                 ret = amdgpu_atombios_get_gfx_info(adev);
1824                 if (ret)
1825                         return ret;
1826                 adev->gfx.config.max_gprs = 256;
1827                 adev->gfx.config.max_gs_threads = 32;
1828                 adev->gfx.config.max_hw_contexts = 8;
1829
1830                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1831                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1832                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1833                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1834                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1835                 break;
1836         case CHIP_TONGA:
1837                 adev->gfx.config.max_shader_engines = 4;
1838                 adev->gfx.config.max_tile_pipes = 8;
1839                 adev->gfx.config.max_cu_per_sh = 8;
1840                 adev->gfx.config.max_sh_per_se = 1;
1841                 adev->gfx.config.max_backends_per_se = 2;
1842                 adev->gfx.config.max_texture_channel_caches = 8;
1843                 adev->gfx.config.max_gprs = 256;
1844                 adev->gfx.config.max_gs_threads = 32;
1845                 adev->gfx.config.max_hw_contexts = 8;
1846
1847                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852                 break;
1853         case CHIP_CARRIZO:
1854                 adev->gfx.config.max_shader_engines = 1;
1855                 adev->gfx.config.max_tile_pipes = 2;
1856                 adev->gfx.config.max_sh_per_se = 1;
1857                 adev->gfx.config.max_backends_per_se = 2;
1858                 adev->gfx.config.max_cu_per_sh = 8;
1859                 adev->gfx.config.max_texture_channel_caches = 2;
1860                 adev->gfx.config.max_gprs = 256;
1861                 adev->gfx.config.max_gs_threads = 32;
1862                 adev->gfx.config.max_hw_contexts = 8;
1863
1864                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869                 break;
1870         case CHIP_STONEY:
1871                 adev->gfx.config.max_shader_engines = 1;
1872                 adev->gfx.config.max_tile_pipes = 2;
1873                 adev->gfx.config.max_sh_per_se = 1;
1874                 adev->gfx.config.max_backends_per_se = 1;
1875                 adev->gfx.config.max_cu_per_sh = 3;
1876                 adev->gfx.config.max_texture_channel_caches = 2;
1877                 adev->gfx.config.max_gprs = 256;
1878                 adev->gfx.config.max_gs_threads = 16;
1879                 adev->gfx.config.max_hw_contexts = 8;
1880
1881                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1882                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1883                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1884                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1885                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1886                 break;
1887         default:
1888                 adev->gfx.config.max_shader_engines = 2;
1889                 adev->gfx.config.max_tile_pipes = 4;
1890                 adev->gfx.config.max_cu_per_sh = 2;
1891                 adev->gfx.config.max_sh_per_se = 1;
1892                 adev->gfx.config.max_backends_per_se = 2;
1893                 adev->gfx.config.max_texture_channel_caches = 4;
1894                 adev->gfx.config.max_gprs = 256;
1895                 adev->gfx.config.max_gs_threads = 32;
1896                 adev->gfx.config.max_hw_contexts = 8;
1897
1898                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1903                 break;
1904         }
1905
1906         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1907         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1908         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1909
1910         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1911         adev->gfx.config.mem_max_burst_length_bytes = 256;
1912         if (adev->flags & AMD_IS_APU) {
1913                 /* Get memory bank mapping mode. */
1914                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1915                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1916                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1917
1918                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1919                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1920                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1921
1922                 /* Validate settings in case only one DIMM installed. */
1923                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1924                         dimm00_addr_map = 0;
1925                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1926                         dimm01_addr_map = 0;
1927                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1928                         dimm10_addr_map = 0;
1929                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1930                         dimm11_addr_map = 0;
1931
1932                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1933                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1934                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1935                         adev->gfx.config.mem_row_size_in_kb = 2;
1936                 else
1937                         adev->gfx.config.mem_row_size_in_kb = 1;
1938         } else {
1939                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1940                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1941                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1942                         adev->gfx.config.mem_row_size_in_kb = 4;
1943         }
1944
1945         adev->gfx.config.shader_engine_tile_size = 32;
1946         adev->gfx.config.num_gpus = 1;
1947         adev->gfx.config.multi_gpu_tile_size = 64;
1948
1949         /* fix up row size */
1950         switch (adev->gfx.config.mem_row_size_in_kb) {
1951         case 1:
1952         default:
1953                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1954                 break;
1955         case 2:
1956                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1957                 break;
1958         case 4:
1959                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1960                 break;
1961         }
1962         adev->gfx.config.gb_addr_config = gb_addr_config;
1963
1964         return 0;
1965 }
1966
1967 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1968                                         int mec, int pipe, int queue)
1969 {
1970         int r;
1971         unsigned irq_type;
1972         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1973
1974         ring = &adev->gfx.compute_ring[ring_id];
1975
1976         /* mec0 is me1 */
1977         ring->me = mec + 1;
1978         ring->pipe = pipe;
1979         ring->queue = queue;
1980
1981         ring->ring_obj = NULL;
1982         ring->use_doorbell = true;
1983         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1984         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1985                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1986         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1987
1988         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1989                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1990                 + ring->pipe;
1991
1992         /* type-2 packets are deprecated on MEC, use type-3 instead */
1993         r = amdgpu_ring_init(adev, ring, 1024,
1994                         &adev->gfx.eop_irq, irq_type);
1995         if (r)
1996                 return r;
1997
1998
1999         return 0;
2000 }
2001
2002 static int gfx_v8_0_sw_init(void *handle)
2003 {
2004         int i, j, k, r, ring_id;
2005         struct amdgpu_ring *ring;
2006         struct amdgpu_kiq *kiq;
2007         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2008
2009         switch (adev->asic_type) {
2010         case CHIP_TONGA:
2011         case CHIP_CARRIZO:
2012         case CHIP_FIJI:
2013         case CHIP_POLARIS10:
2014         case CHIP_POLARIS11:
2015         case CHIP_POLARIS12:
2016         case CHIP_VEGAM:
2017                 adev->gfx.mec.num_mec = 2;
2018                 break;
2019         case CHIP_TOPAZ:
2020         case CHIP_STONEY:
2021         default:
2022                 adev->gfx.mec.num_mec = 1;
2023                 break;
2024         }
2025
2026         adev->gfx.mec.num_pipe_per_mec = 4;
2027         adev->gfx.mec.num_queue_per_pipe = 8;
2028
2029         /* KIQ event */
2030         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2031         if (r)
2032                 return r;
2033
2034         /* EOP Event */
2035         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
2036         if (r)
2037                 return r;
2038
2039         /* Privileged reg */
2040         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2041                               &adev->gfx.priv_reg_irq);
2042         if (r)
2043                 return r;
2044
2045         /* Privileged inst */
2046         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2047                               &adev->gfx.priv_inst_irq);
2048         if (r)
2049                 return r;
2050
2051         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2052
2053         gfx_v8_0_scratch_init(adev);
2054
2055         r = gfx_v8_0_init_microcode(adev);
2056         if (r) {
2057                 DRM_ERROR("Failed to load gfx firmware!\n");
2058                 return r;
2059         }
2060
2061         r = gfx_v8_0_rlc_init(adev);
2062         if (r) {
2063                 DRM_ERROR("Failed to init rlc BOs!\n");
2064                 return r;
2065         }
2066
2067         r = gfx_v8_0_mec_init(adev);
2068         if (r) {
2069                 DRM_ERROR("Failed to init MEC BOs!\n");
2070                 return r;
2071         }
2072
2073         /* set up the gfx ring */
2074         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2075                 ring = &adev->gfx.gfx_ring[i];
2076                 ring->ring_obj = NULL;
2077                 sprintf(ring->name, "gfx");
2078                 /* no gfx doorbells on iceland */
2079                 if (adev->asic_type != CHIP_TOPAZ) {
2080                         ring->use_doorbell = true;
2081                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2082                 }
2083
2084                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2085                                      AMDGPU_CP_IRQ_GFX_EOP);
2086                 if (r)
2087                         return r;
2088         }
2089
2090
2091         /* set up the compute queues - allocate horizontally across pipes */
2092         ring_id = 0;
2093         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2094                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2095                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2096                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2097                                         continue;
2098
2099                                 r = gfx_v8_0_compute_ring_init(adev,
2100                                                                 ring_id,
2101                                                                 i, k, j);
2102                                 if (r)
2103                                         return r;
2104
2105                                 ring_id++;
2106                         }
2107                 }
2108         }
2109
2110         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2111         if (r) {
2112                 DRM_ERROR("Failed to init KIQ BOs!\n");
2113                 return r;
2114         }
2115
2116         kiq = &adev->gfx.kiq;
2117         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2118         if (r)
2119                 return r;
2120
2121         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2122         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2123         if (r)
2124                 return r;
2125
2126         /* reserve GDS, GWS and OA resource for gfx */
2127         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2128                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2129                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2130         if (r)
2131                 return r;
2132
2133         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2134                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2135                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2136         if (r)
2137                 return r;
2138
2139         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2140                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2141                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2142         if (r)
2143                 return r;
2144
2145         adev->gfx.ce_ram_size = 0x8000;
2146
2147         r = gfx_v8_0_gpu_early_init(adev);
2148         if (r)
2149                 return r;
2150
2151         return 0;
2152 }
2153
2154 static int gfx_v8_0_sw_fini(void *handle)
2155 {
2156         int i;
2157         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2158
2159         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2160         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2161         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2162
2163         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2164                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2165         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2166                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2167
2168         amdgpu_gfx_compute_mqd_sw_fini(adev);
2169         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2170         amdgpu_gfx_kiq_fini(adev);
2171
2172         gfx_v8_0_mec_fini(adev);
2173         gfx_v8_0_rlc_fini(adev);
2174         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2175                                 &adev->gfx.rlc.clear_state_gpu_addr,
2176                                 (void **)&adev->gfx.rlc.cs_ptr);
2177         if ((adev->asic_type == CHIP_CARRIZO) ||
2178             (adev->asic_type == CHIP_STONEY)) {
2179                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2180                                 &adev->gfx.rlc.cp_table_gpu_addr,
2181                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2182         }
2183         gfx_v8_0_free_microcode(adev);
2184
2185         return 0;
2186 }
2187
2188 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2189 {
2190         uint32_t *modearray, *mod2array;
2191         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2192         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2193         u32 reg_offset;
2194
2195         modearray = adev->gfx.config.tile_mode_array;
2196         mod2array = adev->gfx.config.macrotile_mode_array;
2197
2198         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2199                 modearray[reg_offset] = 0;
2200
2201         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2202                 mod2array[reg_offset] = 0;
2203
2204         switch (adev->asic_type) {
2205         case CHIP_TOPAZ:
2206                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2207                                 PIPE_CONFIG(ADDR_SURF_P2) |
2208                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2209                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2210                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211                                 PIPE_CONFIG(ADDR_SURF_P2) |
2212                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2213                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2214                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2215                                 PIPE_CONFIG(ADDR_SURF_P2) |
2216                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2217                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2218                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                 PIPE_CONFIG(ADDR_SURF_P2) |
2220                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2221                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2222                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2223                                 PIPE_CONFIG(ADDR_SURF_P2) |
2224                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2225                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2226                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227                                 PIPE_CONFIG(ADDR_SURF_P2) |
2228                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2229                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2231                                 PIPE_CONFIG(ADDR_SURF_P2) |
2232                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2233                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2234                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2235                                 PIPE_CONFIG(ADDR_SURF_P2));
2236                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2237                                 PIPE_CONFIG(ADDR_SURF_P2) |
2238                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2239                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2240                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241                                  PIPE_CONFIG(ADDR_SURF_P2) |
2242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2245                                  PIPE_CONFIG(ADDR_SURF_P2) |
2246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2248                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249                                  PIPE_CONFIG(ADDR_SURF_P2) |
2250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2252                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253                                  PIPE_CONFIG(ADDR_SURF_P2) |
2254                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2255                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2256                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2257                                  PIPE_CONFIG(ADDR_SURF_P2) |
2258                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2259                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2260                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2261                                  PIPE_CONFIG(ADDR_SURF_P2) |
2262                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2263                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2264                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2265                                  PIPE_CONFIG(ADDR_SURF_P2) |
2266                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2267                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2268                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2269                                  PIPE_CONFIG(ADDR_SURF_P2) |
2270                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2271                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2272                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2273                                  PIPE_CONFIG(ADDR_SURF_P2) |
2274                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2275                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2276                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2277                                  PIPE_CONFIG(ADDR_SURF_P2) |
2278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2280                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2281                                  PIPE_CONFIG(ADDR_SURF_P2) |
2282                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2283                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2284                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2285                                  PIPE_CONFIG(ADDR_SURF_P2) |
2286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2289                                  PIPE_CONFIG(ADDR_SURF_P2) |
2290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2293                                  PIPE_CONFIG(ADDR_SURF_P2) |
2294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2296                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2297                                  PIPE_CONFIG(ADDR_SURF_P2) |
2298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2300                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301                                  PIPE_CONFIG(ADDR_SURF_P2) |
2302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2305                                  PIPE_CONFIG(ADDR_SURF_P2) |
2306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2308
2309                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2310                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2311                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312                                 NUM_BANKS(ADDR_SURF_8_BANK));
2313                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2314                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2315                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2316                                 NUM_BANKS(ADDR_SURF_8_BANK));
2317                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2318                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2319                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2320                                 NUM_BANKS(ADDR_SURF_8_BANK));
2321                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2323                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2324                                 NUM_BANKS(ADDR_SURF_8_BANK));
2325                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2327                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2328                                 NUM_BANKS(ADDR_SURF_8_BANK));
2329                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2331                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2332                                 NUM_BANKS(ADDR_SURF_8_BANK));
2333                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2335                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2336                                 NUM_BANKS(ADDR_SURF_8_BANK));
2337                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2338                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2339                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2340                                 NUM_BANKS(ADDR_SURF_16_BANK));
2341                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2342                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2343                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2344                                 NUM_BANKS(ADDR_SURF_16_BANK));
2345                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2346                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2347                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2348                                  NUM_BANKS(ADDR_SURF_16_BANK));
2349                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2350                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2352                                  NUM_BANKS(ADDR_SURF_16_BANK));
2353                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2355                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2356                                  NUM_BANKS(ADDR_SURF_16_BANK));
2357                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360                                  NUM_BANKS(ADDR_SURF_16_BANK));
2361                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364                                  NUM_BANKS(ADDR_SURF_8_BANK));
2365
2366                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2367                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2368                             reg_offset != 23)
2369                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2370
2371                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2372                         if (reg_offset != 7)
2373                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2374
2375                 break;
2376         case CHIP_FIJI:
2377         case CHIP_VEGAM:
2378                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2381                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2385                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2386                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2389                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2390                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2391                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2393                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2394                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2397                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2398                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2401                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2402                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2403                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2405                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2408                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2409                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2410                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2411                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2412                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2429                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2437                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2439                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2443                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2448                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2449                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2452                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2453                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2455                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2456                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2457                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2459                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2460                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2461                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2463                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2465                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2469                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2470                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2471                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2473                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2475                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2477                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2481                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2483                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2484                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2485                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2487                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2491                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2499                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500
2501                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504                                 NUM_BANKS(ADDR_SURF_8_BANK));
2505                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508                                 NUM_BANKS(ADDR_SURF_8_BANK));
2509                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512                                 NUM_BANKS(ADDR_SURF_8_BANK));
2513                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2516                                 NUM_BANKS(ADDR_SURF_8_BANK));
2517                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520                                 NUM_BANKS(ADDR_SURF_8_BANK));
2521                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524                                 NUM_BANKS(ADDR_SURF_8_BANK));
2525                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2528                                 NUM_BANKS(ADDR_SURF_8_BANK));
2529                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2531                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532                                 NUM_BANKS(ADDR_SURF_8_BANK));
2533                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2535                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2536                                 NUM_BANKS(ADDR_SURF_8_BANK));
2537                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2539                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2540                                  NUM_BANKS(ADDR_SURF_8_BANK));
2541                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2543                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2544                                  NUM_BANKS(ADDR_SURF_8_BANK));
2545                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2547                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548                                  NUM_BANKS(ADDR_SURF_8_BANK));
2549                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2551                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552                                  NUM_BANKS(ADDR_SURF_8_BANK));
2553                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2555                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2556                                  NUM_BANKS(ADDR_SURF_4_BANK));
2557
2558                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2559                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2560
2561                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2562                         if (reg_offset != 7)
2563                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2564
2565                 break;
2566         case CHIP_TONGA:
2567                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2570                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2574                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2575                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2578                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2579                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2582                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2586                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2590                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2592                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2594                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2596                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2598                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2600                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2601                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2618                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2626                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2637                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2638                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2640                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2641                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2642                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2644                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2645                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2646                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2648                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2649                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2650                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2651                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2652                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2653                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2654                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2656                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2658                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2660                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2662                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2666                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2670                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2680                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2684                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689
2690                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693                                 NUM_BANKS(ADDR_SURF_16_BANK));
2694                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2697                                 NUM_BANKS(ADDR_SURF_16_BANK));
2698                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2700                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2701                                 NUM_BANKS(ADDR_SURF_16_BANK));
2702                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705                                 NUM_BANKS(ADDR_SURF_16_BANK));
2706                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2708                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2709                                 NUM_BANKS(ADDR_SURF_16_BANK));
2710                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2713                                 NUM_BANKS(ADDR_SURF_16_BANK));
2714                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2717                                 NUM_BANKS(ADDR_SURF_16_BANK));
2718                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2720                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721                                 NUM_BANKS(ADDR_SURF_16_BANK));
2722                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2725                                 NUM_BANKS(ADDR_SURF_16_BANK));
2726                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2728                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729                                  NUM_BANKS(ADDR_SURF_16_BANK));
2730                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2733                                  NUM_BANKS(ADDR_SURF_16_BANK));
2734                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737                                  NUM_BANKS(ADDR_SURF_8_BANK));
2738                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741                                  NUM_BANKS(ADDR_SURF_4_BANK));
2742                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2744                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2745                                  NUM_BANKS(ADDR_SURF_4_BANK));
2746
2747                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2748                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2749
2750                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2751                         if (reg_offset != 7)
2752                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2753
2754                 break;
2755         case CHIP_POLARIS11:
2756         case CHIP_POLARIS12:
2757                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2760                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2761                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2764                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2765                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2768                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2769                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2772                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2776                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2777                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2780                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2781                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2782                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2784                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2785                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2786                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2788                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2789                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2790                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2791                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2810                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2826                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2828                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2830                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2831                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2832                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2834                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2835                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2836                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2838                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2839                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2840                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2842                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2843                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2844                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2846                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2847                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2848                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2850                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2851                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2852                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2855                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2856                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2860                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2863                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2864                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2879
2880                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2883                                 NUM_BANKS(ADDR_SURF_16_BANK));
2884
2885                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2887                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888                                 NUM_BANKS(ADDR_SURF_16_BANK));
2889
2890                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893                                 NUM_BANKS(ADDR_SURF_16_BANK));
2894
2895                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2897                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898                                 NUM_BANKS(ADDR_SURF_16_BANK));
2899
2900                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2903                                 NUM_BANKS(ADDR_SURF_16_BANK));
2904
2905                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2908                                 NUM_BANKS(ADDR_SURF_16_BANK));
2909
2910                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2913                                 NUM_BANKS(ADDR_SURF_16_BANK));
2914
2915                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2917                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918                                 NUM_BANKS(ADDR_SURF_16_BANK));
2919
2920                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2921                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2922                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923                                 NUM_BANKS(ADDR_SURF_16_BANK));
2924
2925                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928                                 NUM_BANKS(ADDR_SURF_16_BANK));
2929
2930                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2932                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933                                 NUM_BANKS(ADDR_SURF_16_BANK));
2934
2935                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2937                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2938                                 NUM_BANKS(ADDR_SURF_16_BANK));
2939
2940                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2942                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2943                                 NUM_BANKS(ADDR_SURF_8_BANK));
2944
2945                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2948                                 NUM_BANKS(ADDR_SURF_4_BANK));
2949
2950                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2951                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2952
2953                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2954                         if (reg_offset != 7)
2955                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2956
2957                 break;
2958         case CHIP_POLARIS10:
2959                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2962                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2963                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2966                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2967                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2970                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2971                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2974                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2975                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2978                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2979                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2982                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2983                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2984                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2986                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2987                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2989                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2990                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2991                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2992                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2993                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3012                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3016                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3020                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3024                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3028                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3030                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3033                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3034                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3036                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3037                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3038                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3040                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3041                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3042                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3044                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3045                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3046                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3048                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3050                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3052                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3054                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3058                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3062                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3064                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3066                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3068                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3072                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3076                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3079                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3080                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081
3082                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3084                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085                                 NUM_BANKS(ADDR_SURF_16_BANK));
3086
3087                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3089                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090                                 NUM_BANKS(ADDR_SURF_16_BANK));
3091
3092                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3094                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3095                                 NUM_BANKS(ADDR_SURF_16_BANK));
3096
3097                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3099                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3100                                 NUM_BANKS(ADDR_SURF_16_BANK));
3101
3102                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3104                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105                                 NUM_BANKS(ADDR_SURF_16_BANK));
3106
3107                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3109                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3110                                 NUM_BANKS(ADDR_SURF_16_BANK));
3111
3112                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115                                 NUM_BANKS(ADDR_SURF_16_BANK));
3116
3117                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3119                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120                                 NUM_BANKS(ADDR_SURF_16_BANK));
3121
3122                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125                                 NUM_BANKS(ADDR_SURF_16_BANK));
3126
3127                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3129                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3130                                 NUM_BANKS(ADDR_SURF_16_BANK));
3131
3132                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3134                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3135                                 NUM_BANKS(ADDR_SURF_16_BANK));
3136
3137                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3140                                 NUM_BANKS(ADDR_SURF_8_BANK));
3141
3142                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3144                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3145                                 NUM_BANKS(ADDR_SURF_4_BANK));
3146
3147                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3150                                 NUM_BANKS(ADDR_SURF_4_BANK));
3151
3152                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3153                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3154
3155                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3156                         if (reg_offset != 7)
3157                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3158
3159                 break;
3160         case CHIP_STONEY:
3161                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3162                                 PIPE_CONFIG(ADDR_SURF_P2) |
3163                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3164                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3165                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166                                 PIPE_CONFIG(ADDR_SURF_P2) |
3167                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3168                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3169                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170                                 PIPE_CONFIG(ADDR_SURF_P2) |
3171                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3172                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174                                 PIPE_CONFIG(ADDR_SURF_P2) |
3175                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3176                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178                                 PIPE_CONFIG(ADDR_SURF_P2) |
3179                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3180                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3182                                 PIPE_CONFIG(ADDR_SURF_P2) |
3183                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3184                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186                                 PIPE_CONFIG(ADDR_SURF_P2) |
3187                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3188                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3189                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3190                                 PIPE_CONFIG(ADDR_SURF_P2));
3191                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3192                                 PIPE_CONFIG(ADDR_SURF_P2) |
3193                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3194                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3195                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196                                  PIPE_CONFIG(ADDR_SURF_P2) |
3197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3200                                  PIPE_CONFIG(ADDR_SURF_P2) |
3201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3203                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3204                                  PIPE_CONFIG(ADDR_SURF_P2) |
3205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208                                  PIPE_CONFIG(ADDR_SURF_P2) |
3209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3212                                  PIPE_CONFIG(ADDR_SURF_P2) |
3213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3216                                  PIPE_CONFIG(ADDR_SURF_P2) |
3217                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3219                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3220                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3222                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3223                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3224                                  PIPE_CONFIG(ADDR_SURF_P2) |
3225                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3226                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3227                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3228                                  PIPE_CONFIG(ADDR_SURF_P2) |
3229                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3230                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3232                                  PIPE_CONFIG(ADDR_SURF_P2) |
3233                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3236                                  PIPE_CONFIG(ADDR_SURF_P2) |
3237                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3238                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3240                                  PIPE_CONFIG(ADDR_SURF_P2) |
3241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3244                                  PIPE_CONFIG(ADDR_SURF_P2) |
3245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3248                                  PIPE_CONFIG(ADDR_SURF_P2) |
3249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3251                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3252                                  PIPE_CONFIG(ADDR_SURF_P2) |
3253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3256                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3260                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3263
3264                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3266                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267                                 NUM_BANKS(ADDR_SURF_8_BANK));
3268                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3270                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271                                 NUM_BANKS(ADDR_SURF_8_BANK));
3272                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3274                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3275                                 NUM_BANKS(ADDR_SURF_8_BANK));
3276                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3279                                 NUM_BANKS(ADDR_SURF_8_BANK));
3280                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283                                 NUM_BANKS(ADDR_SURF_8_BANK));
3284                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287                                 NUM_BANKS(ADDR_SURF_8_BANK));
3288                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3289                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3290                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3291                                 NUM_BANKS(ADDR_SURF_8_BANK));
3292                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3294                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295                                 NUM_BANKS(ADDR_SURF_16_BANK));
3296                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3297                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3298                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3299                                 NUM_BANKS(ADDR_SURF_16_BANK));
3300                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303                                  NUM_BANKS(ADDR_SURF_16_BANK));
3304                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                  NUM_BANKS(ADDR_SURF_16_BANK));
3308                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3310                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311                                  NUM_BANKS(ADDR_SURF_16_BANK));
3312                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315                                  NUM_BANKS(ADDR_SURF_16_BANK));
3316                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319                                  NUM_BANKS(ADDR_SURF_8_BANK));
3320
3321                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3322                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3323                             reg_offset != 23)
3324                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3325
3326                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3327                         if (reg_offset != 7)
3328                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3329
3330                 break;
3331         default:
3332                 dev_warn(adev->dev,
3333                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3334                          adev->asic_type);
3335
3336         case CHIP_CARRIZO:
3337                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3338                                 PIPE_CONFIG(ADDR_SURF_P2) |
3339                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3340                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3341                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342                                 PIPE_CONFIG(ADDR_SURF_P2) |
3343                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3344                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3345                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3346                                 PIPE_CONFIG(ADDR_SURF_P2) |
3347                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3348                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3349                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3350                                 PIPE_CONFIG(ADDR_SURF_P2) |
3351                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3352                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3353                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3354                                 PIPE_CONFIG(ADDR_SURF_P2) |
3355                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3356                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3357                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3358                                 PIPE_CONFIG(ADDR_SURF_P2) |
3359                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3360                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3361                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362                                 PIPE_CONFIG(ADDR_SURF_P2) |
3363                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3364                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3365                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3366                                 PIPE_CONFIG(ADDR_SURF_P2));
3367                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3368                                 PIPE_CONFIG(ADDR_SURF_P2) |
3369                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3370                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3371                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3372                                  PIPE_CONFIG(ADDR_SURF_P2) |
3373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3375                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3376                                  PIPE_CONFIG(ADDR_SURF_P2) |
3377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3379                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3380                                  PIPE_CONFIG(ADDR_SURF_P2) |
3381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384                                  PIPE_CONFIG(ADDR_SURF_P2) |
3385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3387                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3388                                  PIPE_CONFIG(ADDR_SURF_P2) |
3389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3391                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3392                                  PIPE_CONFIG(ADDR_SURF_P2) |
3393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3395                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3396                                  PIPE_CONFIG(ADDR_SURF_P2) |
3397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3399                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3400                                  PIPE_CONFIG(ADDR_SURF_P2) |
3401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3403                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3404                                  PIPE_CONFIG(ADDR_SURF_P2) |
3405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3407                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3408                                  PIPE_CONFIG(ADDR_SURF_P2) |
3409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3411                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3412                                  PIPE_CONFIG(ADDR_SURF_P2) |
3413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3415                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3416                                  PIPE_CONFIG(ADDR_SURF_P2) |
3417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3419                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3420                                  PIPE_CONFIG(ADDR_SURF_P2) |
3421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3423                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3424                                  PIPE_CONFIG(ADDR_SURF_P2) |
3425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3427                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3428                                  PIPE_CONFIG(ADDR_SURF_P2) |
3429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3431                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3432                                  PIPE_CONFIG(ADDR_SURF_P2) |
3433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3436                                  PIPE_CONFIG(ADDR_SURF_P2) |
3437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3439
3440                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3442                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3443                                 NUM_BANKS(ADDR_SURF_8_BANK));
3444                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3445                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3446                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3447                                 NUM_BANKS(ADDR_SURF_8_BANK));
3448                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3449                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3450                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3451                                 NUM_BANKS(ADDR_SURF_8_BANK));
3452                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3453                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3454                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3455                                 NUM_BANKS(ADDR_SURF_8_BANK));
3456                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3457                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3458                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3459                                 NUM_BANKS(ADDR_SURF_8_BANK));
3460                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3461                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3462                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3463                                 NUM_BANKS(ADDR_SURF_8_BANK));
3464                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3465                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3466                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3467                                 NUM_BANKS(ADDR_SURF_8_BANK));
3468                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3469                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3470                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3471                                 NUM_BANKS(ADDR_SURF_16_BANK));
3472                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3473                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3474                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3475                                 NUM_BANKS(ADDR_SURF_16_BANK));
3476                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3477                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3478                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479                                  NUM_BANKS(ADDR_SURF_16_BANK));
3480                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3481                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483                                  NUM_BANKS(ADDR_SURF_16_BANK));
3484                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3486                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3487                                  NUM_BANKS(ADDR_SURF_16_BANK));
3488                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3491                                  NUM_BANKS(ADDR_SURF_16_BANK));
3492                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3494                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3495                                  NUM_BANKS(ADDR_SURF_8_BANK));
3496
3497                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3498                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3499                             reg_offset != 23)
3500                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3501
3502                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3503                         if (reg_offset != 7)
3504                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3505
3506                 break;
3507         }
3508 }
3509
3510 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3511                                   u32 se_num, u32 sh_num, u32 instance)
3512 {
3513         u32 data;
3514
3515         if (instance == 0xffffffff)
3516                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3517         else
3518                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3519
3520         if (se_num == 0xffffffff)
3521                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3522         else
3523                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3524
3525         if (sh_num == 0xffffffff)
3526                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3527         else
3528                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3529
3530         WREG32(mmGRBM_GFX_INDEX, data);
3531 }
3532
3533 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3534                                   u32 me, u32 pipe, u32 q)
3535 {
3536         vi_srbm_select(adev, me, pipe, q, 0);
3537 }
3538
3539 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3540 {
3541         u32 data, mask;
3542
3543         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3544                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3545
3546         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3547
3548         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3549                                          adev->gfx.config.max_sh_per_se);
3550
3551         return (~data) & mask;
3552 }
3553
3554 static void
3555 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3556 {
3557         switch (adev->asic_type) {
3558         case CHIP_FIJI:
3559         case CHIP_VEGAM:
3560                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3561                           RB_XSEL2(1) | PKR_MAP(2) |
3562                           PKR_XSEL(1) | PKR_YSEL(1) |
3563                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3564                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3565                            SE_PAIR_YSEL(2);
3566                 break;
3567         case CHIP_TONGA:
3568         case CHIP_POLARIS10:
3569                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3570                           SE_XSEL(1) | SE_YSEL(1);
3571                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3572                            SE_PAIR_YSEL(2);
3573                 break;
3574         case CHIP_TOPAZ:
3575         case CHIP_CARRIZO:
3576                 *rconf |= RB_MAP_PKR0(2);
3577                 *rconf1 |= 0x0;
3578                 break;
3579         case CHIP_POLARIS11:
3580         case CHIP_POLARIS12:
3581                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3582                           SE_XSEL(1) | SE_YSEL(1);
3583                 *rconf1 |= 0x0;
3584                 break;
3585         case CHIP_STONEY:
3586                 *rconf |= 0x0;
3587                 *rconf1 |= 0x0;
3588                 break;
3589         default:
3590                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3591                 break;
3592         }
3593 }
3594
3595 static void
3596 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3597                                         u32 raster_config, u32 raster_config_1,
3598                                         unsigned rb_mask, unsigned num_rb)
3599 {
3600         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3601         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3602         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3603         unsigned rb_per_se = num_rb / num_se;
3604         unsigned se_mask[4];
3605         unsigned se;
3606
3607         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3608         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3609         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3610         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3611
3612         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3613         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3614         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3615
3616         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3617                              (!se_mask[2] && !se_mask[3]))) {
3618                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3619
3620                 if (!se_mask[0] && !se_mask[1]) {
3621                         raster_config_1 |=
3622                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3623                 } else {
3624                         raster_config_1 |=
3625                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3626                 }
3627         }
3628
3629         for (se = 0; se < num_se; se++) {
3630                 unsigned raster_config_se = raster_config;
3631                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3632                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3633                 int idx = (se / 2) * 2;
3634
3635                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3636                         raster_config_se &= ~SE_MAP_MASK;
3637
3638                         if (!se_mask[idx]) {
3639                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3640                         } else {
3641                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3642                         }
3643                 }
3644
3645                 pkr0_mask &= rb_mask;
3646                 pkr1_mask &= rb_mask;
3647                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3648                         raster_config_se &= ~PKR_MAP_MASK;
3649
3650                         if (!pkr0_mask) {
3651                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3652                         } else {
3653                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3654                         }
3655                 }
3656
3657                 if (rb_per_se >= 2) {
3658                         unsigned rb0_mask = 1 << (se * rb_per_se);
3659                         unsigned rb1_mask = rb0_mask << 1;
3660
3661                         rb0_mask &= rb_mask;
3662                         rb1_mask &= rb_mask;
3663                         if (!rb0_mask || !rb1_mask) {
3664                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3665
3666                                 if (!rb0_mask) {
3667                                         raster_config_se |=
3668                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3669                                 } else {
3670                                         raster_config_se |=
3671                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3672                                 }
3673                         }
3674
3675                         if (rb_per_se > 2) {
3676                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3677                                 rb1_mask = rb0_mask << 1;
3678                                 rb0_mask &= rb_mask;
3679                                 rb1_mask &= rb_mask;
3680                                 if (!rb0_mask || !rb1_mask) {
3681                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3682
3683                                         if (!rb0_mask) {
3684                                                 raster_config_se |=
3685                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3686                                         } else {
3687                                                 raster_config_se |=
3688                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3689                                         }
3690                                 }
3691                         }
3692                 }
3693
3694                 /* GRBM_GFX_INDEX has a different offset on VI */
3695                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3696                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3697                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3698         }
3699
3700         /* GRBM_GFX_INDEX has a different offset on VI */
3701         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3702 }
3703
3704 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3705 {
3706         int i, j;
3707         u32 data;
3708         u32 raster_config = 0, raster_config_1 = 0;
3709         u32 active_rbs = 0;
3710         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3711                                         adev->gfx.config.max_sh_per_se;
3712         unsigned num_rb_pipes;
3713
3714         mutex_lock(&adev->grbm_idx_mutex);
3715         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3716                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3717                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3718                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3719                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3720                                                rb_bitmap_width_per_sh);
3721                 }
3722         }
3723         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3724
3725         adev->gfx.config.backend_enable_mask = active_rbs;
3726         adev->gfx.config.num_rbs = hweight32(active_rbs);
3727
3728         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3729                              adev->gfx.config.max_shader_engines, 16);
3730
3731         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3732
3733         if (!adev->gfx.config.backend_enable_mask ||
3734                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3735                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3736                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3737         } else {
3738                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3739                                                         adev->gfx.config.backend_enable_mask,
3740                                                         num_rb_pipes);
3741         }
3742
3743         /* cache the values for userspace */
3744         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3745                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3746                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3747                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3748                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3749                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3750                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3751                         adev->gfx.config.rb_config[i][j].raster_config =
3752                                 RREG32(mmPA_SC_RASTER_CONFIG);
3753                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3754                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3755                 }
3756         }
3757         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3758         mutex_unlock(&adev->grbm_idx_mutex);
3759 }
3760
3761 /**
3762  * gfx_v8_0_init_compute_vmid - gart enable
3763  *
3764  * @adev: amdgpu_device pointer
3765  *
3766  * Initialize compute vmid sh_mem registers
3767  *
3768  */
3769 #define DEFAULT_SH_MEM_BASES    (0x6000)
3770 #define FIRST_COMPUTE_VMID      (8)
3771 #define LAST_COMPUTE_VMID       (16)
3772 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3773 {
3774         int i;
3775         uint32_t sh_mem_config;
3776         uint32_t sh_mem_bases;
3777
3778         /*
3779          * Configure apertures:
3780          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3781          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3782          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3783          */
3784         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3785
3786         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3787                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3788                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3789                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3790                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3791                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3792
3793         mutex_lock(&adev->srbm_mutex);
3794         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3795                 vi_srbm_select(adev, 0, 0, 0, i);
3796                 /* CP and shaders */
3797                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3798                 WREG32(mmSH_MEM_APE1_BASE, 1);
3799                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3800                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3801         }
3802         vi_srbm_select(adev, 0, 0, 0, 0);
3803         mutex_unlock(&adev->srbm_mutex);
3804 }
3805
3806 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3807 {
3808         switch (adev->asic_type) {
3809         default:
3810                 adev->gfx.config.double_offchip_lds_buf = 1;
3811                 break;
3812         case CHIP_CARRIZO:
3813         case CHIP_STONEY:
3814                 adev->gfx.config.double_offchip_lds_buf = 0;
3815                 break;
3816         }
3817 }
3818
3819 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3820 {
3821         u32 tmp, sh_static_mem_cfg;
3822         int i;
3823
3824         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3825         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3826         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3827         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3828
3829         gfx_v8_0_tiling_mode_table_init(adev);
3830         gfx_v8_0_setup_rb(adev);
3831         gfx_v8_0_get_cu_info(adev);
3832         gfx_v8_0_config_init(adev);
3833
3834         /* XXX SH_MEM regs */
3835         /* where to put LDS, scratch, GPUVM in FSA64 space */
3836         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3837                                    SWIZZLE_ENABLE, 1);
3838         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3839                                    ELEMENT_SIZE, 1);
3840         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3841                                    INDEX_STRIDE, 3);
3842         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3843
3844         mutex_lock(&adev->srbm_mutex);
3845         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3846                 vi_srbm_select(adev, 0, 0, 0, i);
3847                 /* CP and shaders */
3848                 if (i == 0) {
3849                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3850                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3851                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3852                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3853                         WREG32(mmSH_MEM_CONFIG, tmp);
3854                         WREG32(mmSH_MEM_BASES, 0);
3855                 } else {
3856                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3857                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3858                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3859                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3860                         WREG32(mmSH_MEM_CONFIG, tmp);
3861                         tmp = adev->gmc.shared_aperture_start >> 48;
3862                         WREG32(mmSH_MEM_BASES, tmp);
3863                 }
3864
3865                 WREG32(mmSH_MEM_APE1_BASE, 1);
3866                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3867         }
3868         vi_srbm_select(adev, 0, 0, 0, 0);
3869         mutex_unlock(&adev->srbm_mutex);
3870
3871         gfx_v8_0_init_compute_vmid(adev);
3872
3873         mutex_lock(&adev->grbm_idx_mutex);
3874         /*
3875          * making sure that the following register writes will be broadcasted
3876          * to all the shaders
3877          */
3878         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3879
3880         WREG32(mmPA_SC_FIFO_SIZE,
3881                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3882                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3883                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3884                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3885                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3886                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3887                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3888                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3889
3890         tmp = RREG32(mmSPI_ARB_PRIORITY);
3891         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3892         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3893         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3894         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3895         WREG32(mmSPI_ARB_PRIORITY, tmp);
3896
3897         mutex_unlock(&adev->grbm_idx_mutex);
3898
3899 }
3900
3901 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3902 {
3903         u32 i, j, k;
3904         u32 mask;
3905
3906         mutex_lock(&adev->grbm_idx_mutex);
3907         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3908                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3909                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3910                         for (k = 0; k < adev->usec_timeout; k++) {
3911                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3912                                         break;
3913                                 udelay(1);
3914                         }
3915                         if (k == adev->usec_timeout) {
3916                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3917                                                       0xffffffff, 0xffffffff);
3918                                 mutex_unlock(&adev->grbm_idx_mutex);
3919                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3920                                          i, j);
3921                                 return;
3922                         }
3923                 }
3924         }
3925         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3926         mutex_unlock(&adev->grbm_idx_mutex);
3927
3928         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3929                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3930                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3931                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3932         for (k = 0; k < adev->usec_timeout; k++) {
3933                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3934                         break;
3935                 udelay(1);
3936         }
3937 }
3938
3939 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3940                                                bool enable)
3941 {
3942         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3943
3944         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3945         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3946         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3947         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3948
3949         WREG32(mmCP_INT_CNTL_RING0, tmp);
3950 }
3951
3952 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3953 {
3954         /* csib */
3955         WREG32(mmRLC_CSIB_ADDR_HI,
3956                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3957         WREG32(mmRLC_CSIB_ADDR_LO,
3958                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3959         WREG32(mmRLC_CSIB_LENGTH,
3960                         adev->gfx.rlc.clear_state_size);
3961 }
3962
3963 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3964                                 int ind_offset,
3965                                 int list_size,
3966                                 int *unique_indices,
3967                                 int *indices_count,
3968                                 int max_indices,
3969                                 int *ind_start_offsets,
3970                                 int *offset_count,
3971                                 int max_offset)
3972 {
3973         int indices;
3974         bool new_entry = true;
3975
3976         for (; ind_offset < list_size; ind_offset++) {
3977
3978                 if (new_entry) {
3979                         new_entry = false;
3980                         ind_start_offsets[*offset_count] = ind_offset;
3981                         *offset_count = *offset_count + 1;
3982                         BUG_ON(*offset_count >= max_offset);
3983                 }
3984
3985                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3986                         new_entry = true;
3987                         continue;
3988                 }
3989
3990                 ind_offset += 2;
3991
3992                 /* look for the matching indice */
3993                 for (indices = 0;
3994                         indices < *indices_count;
3995                         indices++) {
3996                         if (unique_indices[indices] ==
3997                                 register_list_format[ind_offset])
3998                                 break;
3999                 }
4000
4001                 if (indices >= *indices_count) {
4002                         unique_indices[*indices_count] =
4003                                 register_list_format[ind_offset];
4004                         indices = *indices_count;
4005                         *indices_count = *indices_count + 1;
4006                         BUG_ON(*indices_count >= max_indices);
4007                 }
4008
4009                 register_list_format[ind_offset] = indices;
4010         }
4011 }
4012
4013 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4014 {
4015         int i, temp, data;
4016         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4017         int indices_count = 0;
4018         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4019         int offset_count = 0;
4020
4021         int list_size;
4022         unsigned int *register_list_format =
4023                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4024         if (!register_list_format)
4025                 return -ENOMEM;
4026         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4027                         adev->gfx.rlc.reg_list_format_size_bytes);
4028
4029         gfx_v8_0_parse_ind_reg_list(register_list_format,
4030                                 RLC_FormatDirectRegListLength,
4031                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4032                                 unique_indices,
4033                                 &indices_count,
4034                                 ARRAY_SIZE(unique_indices),
4035                                 indirect_start_offsets,
4036                                 &offset_count,
4037                                 ARRAY_SIZE(indirect_start_offsets));
4038
4039         /* save and restore list */
4040         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4041
4042         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4043         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4044                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4045
4046         /* indirect list */
4047         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4048         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4049                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4050
4051         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4052         list_size = list_size >> 1;
4053         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4054         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4055
4056         /* starting offsets starts */
4057         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4058                 adev->gfx.rlc.starting_offsets_start);
4059         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4060                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4061                                 indirect_start_offsets[i]);
4062
4063         /* unique indices */
4064         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4065         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4066         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4067                 if (unique_indices[i] != 0) {
4068                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4069                         WREG32(data + i, unique_indices[i] >> 20);
4070                 }
4071         }
4072         kfree(register_list_format);
4073
4074         return 0;
4075 }
4076
4077 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4078 {
4079         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4080 }
4081
4082 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4083 {
4084         uint32_t data;
4085
4086         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4087
4088         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4089         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4090         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4091         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4092         WREG32(mmRLC_PG_DELAY, data);
4093
4094         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4095         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4096
4097 }
4098
4099 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4100                                                 bool enable)
4101 {
4102         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4103 }
4104
4105 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4106                                                   bool enable)
4107 {
4108         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4109 }
4110
4111 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4112 {
4113         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4114 }
4115
4116 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4117 {
4118         if ((adev->asic_type == CHIP_CARRIZO) ||
4119             (adev->asic_type == CHIP_STONEY)) {
4120                 gfx_v8_0_init_csb(adev);
4121                 gfx_v8_0_init_save_restore_list(adev);
4122                 gfx_v8_0_enable_save_restore_machine(adev);
4123                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4124                 gfx_v8_0_init_power_gating(adev);
4125                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4126         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4127                    (adev->asic_type == CHIP_POLARIS12) ||
4128                    (adev->asic_type == CHIP_VEGAM)) {
4129                 gfx_v8_0_init_csb(adev);
4130                 gfx_v8_0_init_save_restore_list(adev);
4131                 gfx_v8_0_enable_save_restore_machine(adev);
4132                 gfx_v8_0_init_power_gating(adev);
4133         }
4134
4135 }
4136
4137 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4138 {
4139         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4140
4141         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4142         gfx_v8_0_wait_for_rlc_serdes(adev);
4143 }
4144
4145 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4146 {
4147         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4148         udelay(50);
4149
4150         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4151         udelay(50);
4152 }
4153
4154 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4155 {
4156         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4157
4158         /* carrizo do enable cp interrupt after cp inited */
4159         if (!(adev->flags & AMD_IS_APU))
4160                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4161
4162         udelay(50);
4163 }
4164
4165 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4166 {
4167         const struct rlc_firmware_header_v2_0 *hdr;
4168         const __le32 *fw_data;
4169         unsigned i, fw_size;
4170
4171         if (!adev->gfx.rlc_fw)
4172                 return -EINVAL;
4173
4174         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4175         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4176
4177         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4178                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4179         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4180
4181         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4182         for (i = 0; i < fw_size; i++)
4183                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4184         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4185
4186         return 0;
4187 }
4188
4189 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4190 {
4191         int r;
4192         u32 tmp;
4193
4194         gfx_v8_0_rlc_stop(adev);
4195
4196         /* disable CG */
4197         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4198         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4199                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4200         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4201         if (adev->asic_type == CHIP_POLARIS11 ||
4202             adev->asic_type == CHIP_POLARIS10 ||
4203             adev->asic_type == CHIP_POLARIS12 ||
4204             adev->asic_type == CHIP_VEGAM) {
4205                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4206                 tmp &= ~0x3;
4207                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4208         }
4209
4210         /* disable PG */
4211         WREG32(mmRLC_PG_CNTL, 0);
4212
4213         gfx_v8_0_rlc_reset(adev);
4214         gfx_v8_0_init_pg(adev);
4215
4216
4217         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4218                 /* legacy rlc firmware loading */
4219                 r = gfx_v8_0_rlc_load_microcode(adev);
4220                 if (r)
4221                         return r;
4222         }
4223
4224         gfx_v8_0_rlc_start(adev);
4225
4226         return 0;
4227 }
4228
4229 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4230 {
4231         int i;
4232         u32 tmp = RREG32(mmCP_ME_CNTL);
4233
4234         if (enable) {
4235                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4236                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4237                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4238         } else {
4239                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4240                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4241                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4242                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4243                         adev->gfx.gfx_ring[i].ready = false;
4244         }
4245         WREG32(mmCP_ME_CNTL, tmp);
4246         udelay(50);
4247 }
4248
4249 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4250 {
4251         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4252         const struct gfx_firmware_header_v1_0 *ce_hdr;
4253         const struct gfx_firmware_header_v1_0 *me_hdr;
4254         const __le32 *fw_data;
4255         unsigned i, fw_size;
4256
4257         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4258                 return -EINVAL;
4259
4260         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4261                 adev->gfx.pfp_fw->data;
4262         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4263                 adev->gfx.ce_fw->data;
4264         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4265                 adev->gfx.me_fw->data;
4266
4267         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4268         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4269         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4270
4271         gfx_v8_0_cp_gfx_enable(adev, false);
4272
4273         /* PFP */
4274         fw_data = (const __le32 *)
4275                 (adev->gfx.pfp_fw->data +
4276                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4277         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4278         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4279         for (i = 0; i < fw_size; i++)
4280                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4281         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4282
4283         /* CE */
4284         fw_data = (const __le32 *)
4285                 (adev->gfx.ce_fw->data +
4286                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4287         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4288         WREG32(mmCP_CE_UCODE_ADDR, 0);
4289         for (i = 0; i < fw_size; i++)
4290                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4291         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4292
4293         /* ME */
4294         fw_data = (const __le32 *)
4295                 (adev->gfx.me_fw->data +
4296                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4297         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4298         WREG32(mmCP_ME_RAM_WADDR, 0);
4299         for (i = 0; i < fw_size; i++)
4300                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4301         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4302
4303         return 0;
4304 }
4305
4306 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4307 {
4308         u32 count = 0;
4309         const struct cs_section_def *sect = NULL;
4310         const struct cs_extent_def *ext = NULL;
4311
4312         /* begin clear state */
4313         count += 2;
4314         /* context control state */
4315         count += 3;
4316
4317         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4318                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4319                         if (sect->id == SECT_CONTEXT)
4320                                 count += 2 + ext->reg_count;
4321                         else
4322                                 return 0;
4323                 }
4324         }
4325         /* pa_sc_raster_config/pa_sc_raster_config1 */
4326         count += 4;
4327         /* end clear state */
4328         count += 2;
4329         /* clear state */
4330         count += 2;
4331
4332         return count;
4333 }
4334
4335 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4336 {
4337         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4338         const struct cs_section_def *sect = NULL;
4339         const struct cs_extent_def *ext = NULL;
4340         int r, i;
4341
4342         /* init the CP */
4343         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4344         WREG32(mmCP_ENDIAN_SWAP, 0);
4345         WREG32(mmCP_DEVICE_ID, 1);
4346
4347         gfx_v8_0_cp_gfx_enable(adev, true);
4348
4349         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4350         if (r) {
4351                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4352                 return r;
4353         }
4354
4355         /* clear state buffer */
4356         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4357         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4358
4359         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4360         amdgpu_ring_write(ring, 0x80000000);
4361         amdgpu_ring_write(ring, 0x80000000);
4362
4363         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4364                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4365                         if (sect->id == SECT_CONTEXT) {
4366                                 amdgpu_ring_write(ring,
4367                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4368                                                ext->reg_count));
4369                                 amdgpu_ring_write(ring,
4370                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4371                                 for (i = 0; i < ext->reg_count; i++)
4372                                         amdgpu_ring_write(ring, ext->extent[i]);
4373                         }
4374                 }
4375         }
4376
4377         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4378         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4379         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4380         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4381
4382         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4383         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4384
4385         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4386         amdgpu_ring_write(ring, 0);
4387
4388         /* init the CE partitions */
4389         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4390         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4391         amdgpu_ring_write(ring, 0x8000);
4392         amdgpu_ring_write(ring, 0x8000);
4393
4394         amdgpu_ring_commit(ring);
4395
4396         return 0;
4397 }
4398 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4399 {
4400         u32 tmp;
4401         /* no gfx doorbells on iceland */
4402         if (adev->asic_type == CHIP_TOPAZ)
4403                 return;
4404
4405         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4406
4407         if (ring->use_doorbell) {
4408                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4409                                 DOORBELL_OFFSET, ring->doorbell_index);
4410                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4411                                                 DOORBELL_HIT, 0);
4412                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4413                                             DOORBELL_EN, 1);
4414         } else {
4415                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4416         }
4417
4418         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4419
4420         if (adev->flags & AMD_IS_APU)
4421                 return;
4422
4423         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4424                                         DOORBELL_RANGE_LOWER,
4425                                         AMDGPU_DOORBELL_GFX_RING0);
4426         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4427
4428         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4429                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4430 }
4431
4432 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4433 {
4434         struct amdgpu_ring *ring;
4435         u32 tmp;
4436         u32 rb_bufsz;
4437         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4438         int r;
4439
4440         /* Set the write pointer delay */
4441         WREG32(mmCP_RB_WPTR_DELAY, 0);
4442
4443         /* set the RB to use vmid 0 */
4444         WREG32(mmCP_RB_VMID, 0);
4445
4446         /* Set ring buffer size */
4447         ring = &adev->gfx.gfx_ring[0];
4448         rb_bufsz = order_base_2(ring->ring_size / 8);
4449         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4450         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4451         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4452         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4453 #ifdef __BIG_ENDIAN
4454         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4455 #endif
4456         WREG32(mmCP_RB0_CNTL, tmp);
4457
4458         /* Initialize the ring buffer's read and write pointers */
4459         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4460         ring->wptr = 0;
4461         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4462
4463         /* set the wb address wether it's enabled or not */
4464         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4465         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4466         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4467
4468         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4469         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4470         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4471         mdelay(1);
4472         WREG32(mmCP_RB0_CNTL, tmp);
4473
4474         rb_addr = ring->gpu_addr >> 8;
4475         WREG32(mmCP_RB0_BASE, rb_addr);
4476         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4477
4478         gfx_v8_0_set_cpg_door_bell(adev, ring);
4479         /* start the ring */
4480         amdgpu_ring_clear_ring(ring);
4481         gfx_v8_0_cp_gfx_start(adev);
4482         ring->ready = true;
4483         r = amdgpu_ring_test_ring(ring);
4484         if (r)
4485                 ring->ready = false;
4486
4487         return r;
4488 }
4489
4490 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4491 {
4492         int i;
4493
4494         if (enable) {
4495                 WREG32(mmCP_MEC_CNTL, 0);
4496         } else {
4497                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4498                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4499                         adev->gfx.compute_ring[i].ready = false;
4500                 adev->gfx.kiq.ring.ready = false;
4501         }
4502         udelay(50);
4503 }
4504
4505 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4506 {
4507         const struct gfx_firmware_header_v1_0 *mec_hdr;
4508         const __le32 *fw_data;
4509         unsigned i, fw_size;
4510
4511         if (!adev->gfx.mec_fw)
4512                 return -EINVAL;
4513
4514         gfx_v8_0_cp_compute_enable(adev, false);
4515
4516         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4517         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4518
4519         fw_data = (const __le32 *)
4520                 (adev->gfx.mec_fw->data +
4521                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4522         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4523
4524         /* MEC1 */
4525         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4526         for (i = 0; i < fw_size; i++)
4527                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4528         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4529
4530         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4531         if (adev->gfx.mec2_fw) {
4532                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4533
4534                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4535                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4536
4537                 fw_data = (const __le32 *)
4538                         (adev->gfx.mec2_fw->data +
4539                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4540                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4541
4542                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4543                 for (i = 0; i < fw_size; i++)
4544                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4545                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4546         }
4547
4548         return 0;
4549 }
4550
4551 /* KIQ functions */
4552 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4553 {
4554         uint32_t tmp;
4555         struct amdgpu_device *adev = ring->adev;
4556
4557         /* tell RLC which is KIQ queue */
4558         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4559         tmp &= 0xffffff00;
4560         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4561         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4562         tmp |= 0x80;
4563         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4564 }
4565
4566 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4567 {
4568         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4569         uint32_t scratch, tmp = 0;
4570         uint64_t queue_mask = 0;
4571         int r, i;
4572
4573         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4574                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4575                         continue;
4576
4577                 /* This situation may be hit in the future if a new HW
4578                  * generation exposes more than 64 queues. If so, the
4579                  * definition of queue_mask needs updating */
4580                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4581                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4582                         break;
4583                 }
4584
4585                 queue_mask |= (1ull << i);
4586         }
4587
4588         r = amdgpu_gfx_scratch_get(adev, &scratch);
4589         if (r) {
4590                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4591                 return r;
4592         }
4593         WREG32(scratch, 0xCAFEDEAD);
4594
4595         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4596         if (r) {
4597                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4598                 amdgpu_gfx_scratch_free(adev, scratch);
4599                 return r;
4600         }
4601         /* set resources */
4602         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4603         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4604         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4605         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4606         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4607         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4608         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4609         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4610         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4611                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4612                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4613                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4614
4615                 /* map queues */
4616                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4617                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4618                 amdgpu_ring_write(kiq_ring,
4619                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4620                 amdgpu_ring_write(kiq_ring,
4621                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4622                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4623                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4624                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4625                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4626                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4627                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4628                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4629         }
4630         /* write to scratch for completion */
4631         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4632         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4633         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4634         amdgpu_ring_commit(kiq_ring);
4635
4636         for (i = 0; i < adev->usec_timeout; i++) {
4637                 tmp = RREG32(scratch);
4638                 if (tmp == 0xDEADBEEF)
4639                         break;
4640                 DRM_UDELAY(1);
4641         }
4642         if (i >= adev->usec_timeout) {
4643                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4644                           scratch, tmp);
4645                 r = -EINVAL;
4646         }
4647         amdgpu_gfx_scratch_free(adev, scratch);
4648
4649         return r;
4650 }
4651
4652 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4653 {
4654         int i, r = 0;
4655
4656         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4657                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4658                 for (i = 0; i < adev->usec_timeout; i++) {
4659                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4660                                 break;
4661                         udelay(1);
4662                 }
4663                 if (i == adev->usec_timeout)
4664                         r = -ETIMEDOUT;
4665         }
4666         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4667         WREG32(mmCP_HQD_PQ_RPTR, 0);
4668         WREG32(mmCP_HQD_PQ_WPTR, 0);
4669
4670         return r;
4671 }
4672
4673 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4674 {
4675         struct amdgpu_device *adev = ring->adev;
4676         struct vi_mqd *mqd = ring->mqd_ptr;
4677         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4678         uint32_t tmp;
4679
4680         mqd->header = 0xC0310800;
4681         mqd->compute_pipelinestat_enable = 0x00000001;
4682         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4683         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4684         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4685         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4686         mqd->compute_misc_reserved = 0x00000003;
4687         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4688                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4689         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4690                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4691         eop_base_addr = ring->eop_gpu_addr >> 8;
4692         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4693         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4694
4695         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4696         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4697         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4698                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4699
4700         mqd->cp_hqd_eop_control = tmp;
4701
4702         /* enable doorbell? */
4703         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4704                             CP_HQD_PQ_DOORBELL_CONTROL,
4705                             DOORBELL_EN,
4706                             ring->use_doorbell ? 1 : 0);
4707
4708         mqd->cp_hqd_pq_doorbell_control = tmp;
4709
4710         /* set the pointer to the MQD */
4711         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4712         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4713
4714         /* set MQD vmid to 0 */
4715         tmp = RREG32(mmCP_MQD_CONTROL);
4716         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4717         mqd->cp_mqd_control = tmp;
4718
4719         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4720         hqd_gpu_addr = ring->gpu_addr >> 8;
4721         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4722         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4723
4724         /* set up the HQD, this is similar to CP_RB0_CNTL */
4725         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4726         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4727                             (order_base_2(ring->ring_size / 4) - 1));
4728         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4729                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4730 #ifdef __BIG_ENDIAN
4731         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4732 #endif
4733         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4734         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4735         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4736         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4737         mqd->cp_hqd_pq_control = tmp;
4738
4739         /* set the wb address whether it's enabled or not */
4740         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4741         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4742         mqd->cp_hqd_pq_rptr_report_addr_hi =
4743                 upper_32_bits(wb_gpu_addr) & 0xffff;
4744
4745         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4746         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4747         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4748         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4749
4750         tmp = 0;
4751         /* enable the doorbell if requested */
4752         if (ring->use_doorbell) {
4753                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4754                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4755                                 DOORBELL_OFFSET, ring->doorbell_index);
4756
4757                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4758                                          DOORBELL_EN, 1);
4759                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4760                                          DOORBELL_SOURCE, 0);
4761                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4762                                          DOORBELL_HIT, 0);
4763         }
4764
4765         mqd->cp_hqd_pq_doorbell_control = tmp;
4766
4767         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4768         ring->wptr = 0;
4769         mqd->cp_hqd_pq_wptr = ring->wptr;
4770         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4771
4772         /* set the vmid for the queue */
4773         mqd->cp_hqd_vmid = 0;
4774
4775         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4776         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4777         mqd->cp_hqd_persistent_state = tmp;
4778
4779         /* set MTYPE */
4780         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4781         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4782         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4783         mqd->cp_hqd_ib_control = tmp;
4784
4785         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4786         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4787         mqd->cp_hqd_iq_timer = tmp;
4788
4789         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4790         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4791         mqd->cp_hqd_ctx_save_control = tmp;
4792
4793         /* defaults */
4794         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4795         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4796         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4797         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4798         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4799         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4800         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4801         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4802         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4803         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4804         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4805         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4806         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4807         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4808         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4809
4810         /* activate the queue */
4811         mqd->cp_hqd_active = 1;
4812
4813         return 0;
4814 }
4815
4816 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4817                         struct vi_mqd *mqd)
4818 {
4819         uint32_t mqd_reg;
4820         uint32_t *mqd_data;
4821
4822         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4823         mqd_data = &mqd->cp_mqd_base_addr_lo;
4824
4825         /* disable wptr polling */
4826         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4827
4828         /* program all HQD registers */
4829         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4830                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4831
4832         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4833          * This is safe since EOP RPTR==WPTR for any inactive HQD
4834          * on ASICs that do not support context-save.
4835          * EOP writes/reads can start anywhere in the ring.
4836          */
4837         if (adev->asic_type != CHIP_TONGA) {
4838                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4839                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4840                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4841         }
4842
4843         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4844                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4845
4846         /* activate the HQD */
4847         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4848                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4849
4850         return 0;
4851 }
4852
4853 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4854 {
4855         struct amdgpu_device *adev = ring->adev;
4856         struct vi_mqd *mqd = ring->mqd_ptr;
4857         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4858
4859         gfx_v8_0_kiq_setting(ring);
4860
4861         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4862                 /* reset MQD to a clean status */
4863                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4864                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4865
4866                 /* reset ring buffer */
4867                 ring->wptr = 0;
4868                 amdgpu_ring_clear_ring(ring);
4869                 mutex_lock(&adev->srbm_mutex);
4870                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4871                 gfx_v8_0_mqd_commit(adev, mqd);
4872                 vi_srbm_select(adev, 0, 0, 0, 0);
4873                 mutex_unlock(&adev->srbm_mutex);
4874         } else {
4875                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4876                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4877                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4878                 mutex_lock(&adev->srbm_mutex);
4879                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4880                 gfx_v8_0_mqd_init(ring);
4881                 gfx_v8_0_mqd_commit(adev, mqd);
4882                 vi_srbm_select(adev, 0, 0, 0, 0);
4883                 mutex_unlock(&adev->srbm_mutex);
4884
4885                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4886                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4887         }
4888
4889         return 0;
4890 }
4891
4892 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4893 {
4894         struct amdgpu_device *adev = ring->adev;
4895         struct vi_mqd *mqd = ring->mqd_ptr;
4896         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4897
4898         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4899                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4900                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4901                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4902                 mutex_lock(&adev->srbm_mutex);
4903                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4904                 gfx_v8_0_mqd_init(ring);
4905                 vi_srbm_select(adev, 0, 0, 0, 0);
4906                 mutex_unlock(&adev->srbm_mutex);
4907
4908                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4909                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4910         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4911                 /* reset MQD to a clean status */
4912                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4913                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4914                 /* reset ring buffer */
4915                 ring->wptr = 0;
4916                 amdgpu_ring_clear_ring(ring);
4917         } else {
4918                 amdgpu_ring_clear_ring(ring);
4919         }
4920         return 0;
4921 }
4922
4923 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4924 {
4925         if (adev->asic_type > CHIP_TONGA) {
4926                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4927                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4928         }
4929         /* enable doorbells */
4930         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4931 }
4932
4933 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4934 {
4935         struct amdgpu_ring *ring = NULL;
4936         int r = 0, i;
4937
4938         gfx_v8_0_cp_compute_enable(adev, true);
4939
4940         ring = &adev->gfx.kiq.ring;
4941
4942         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4943         if (unlikely(r != 0))
4944                 goto done;
4945
4946         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4947         if (!r) {
4948                 r = gfx_v8_0_kiq_init_queue(ring);
4949                 amdgpu_bo_kunmap(ring->mqd_obj);
4950                 ring->mqd_ptr = NULL;
4951         }
4952         amdgpu_bo_unreserve(ring->mqd_obj);
4953         if (r)
4954                 goto done;
4955
4956         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4957                 ring = &adev->gfx.compute_ring[i];
4958
4959                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4960                 if (unlikely(r != 0))
4961                         goto done;
4962                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4963                 if (!r) {
4964                         r = gfx_v8_0_kcq_init_queue(ring);
4965                         amdgpu_bo_kunmap(ring->mqd_obj);
4966                         ring->mqd_ptr = NULL;
4967                 }
4968                 amdgpu_bo_unreserve(ring->mqd_obj);
4969                 if (r)
4970                         goto done;
4971         }
4972
4973         gfx_v8_0_set_mec_doorbell_range(adev);
4974
4975         r = gfx_v8_0_kiq_kcq_enable(adev);
4976         if (r)
4977                 goto done;
4978
4979         /* Test KIQ */
4980         ring = &adev->gfx.kiq.ring;
4981         ring->ready = true;
4982         r = amdgpu_ring_test_ring(ring);
4983         if (r) {
4984                 ring->ready = false;
4985                 goto done;
4986         }
4987
4988         /* Test KCQs */
4989         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4990                 ring = &adev->gfx.compute_ring[i];
4991                 ring->ready = true;
4992                 r = amdgpu_ring_test_ring(ring);
4993                 if (r)
4994                         ring->ready = false;
4995         }
4996
4997 done:
4998         return r;
4999 }
5000
5001 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5002 {
5003         int r;
5004
5005         if (!(adev->flags & AMD_IS_APU))
5006                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5007
5008         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
5009                         /* legacy firmware loading */
5010                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5011                 if (r)
5012                         return r;
5013
5014                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5015                 if (r)
5016                         return r;
5017         }
5018
5019         r = gfx_v8_0_cp_gfx_resume(adev);
5020         if (r)
5021                 return r;
5022
5023         r = gfx_v8_0_kiq_resume(adev);
5024         if (r)
5025                 return r;
5026
5027         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5028
5029         return 0;
5030 }
5031
5032 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5033 {
5034         gfx_v8_0_cp_gfx_enable(adev, enable);
5035         gfx_v8_0_cp_compute_enable(adev, enable);
5036 }
5037
5038 static int gfx_v8_0_hw_init(void *handle)
5039 {
5040         int r;
5041         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042
5043         gfx_v8_0_init_golden_registers(adev);
5044         gfx_v8_0_gpu_init(adev);
5045
5046         r = gfx_v8_0_rlc_resume(adev);
5047         if (r)
5048                 return r;
5049
5050         r = gfx_v8_0_cp_resume(adev);
5051
5052         return r;
5053 }
5054
5055 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5056 {
5057         struct amdgpu_device *adev = kiq_ring->adev;
5058         uint32_t scratch, tmp = 0;
5059         int r, i;
5060
5061         r = amdgpu_gfx_scratch_get(adev, &scratch);
5062         if (r) {
5063                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5064                 return r;
5065         }
5066         WREG32(scratch, 0xCAFEDEAD);
5067
5068         r = amdgpu_ring_alloc(kiq_ring, 10);
5069         if (r) {
5070                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5071                 amdgpu_gfx_scratch_free(adev, scratch);
5072                 return r;
5073         }
5074
5075         /* unmap queues */
5076         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5077         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5078                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5079                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5080                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5081                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5082         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5083         amdgpu_ring_write(kiq_ring, 0);
5084         amdgpu_ring_write(kiq_ring, 0);
5085         amdgpu_ring_write(kiq_ring, 0);
5086         /* write to scratch for completion */
5087         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5088         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5089         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5090         amdgpu_ring_commit(kiq_ring);
5091
5092         for (i = 0; i < adev->usec_timeout; i++) {
5093                 tmp = RREG32(scratch);
5094                 if (tmp == 0xDEADBEEF)
5095                         break;
5096                 DRM_UDELAY(1);
5097         }
5098         if (i >= adev->usec_timeout) {
5099                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5100                 r = -EINVAL;
5101         }
5102         amdgpu_gfx_scratch_free(adev, scratch);
5103         return r;
5104 }
5105
5106 static int gfx_v8_0_hw_fini(void *handle)
5107 {
5108         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5109         int i;
5110
5111         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5112         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5113
5114         /* disable KCQ to avoid CPC touch memory not valid anymore */
5115         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5116                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5117
5118         if (amdgpu_sriov_vf(adev)) {
5119                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5120                 return 0;
5121         }
5122         gfx_v8_0_cp_enable(adev, false);
5123         gfx_v8_0_rlc_stop(adev);
5124
5125         amdgpu_device_ip_set_powergating_state(adev,
5126                                                AMD_IP_BLOCK_TYPE_GFX,
5127                                                AMD_PG_STATE_UNGATE);
5128
5129         return 0;
5130 }
5131
5132 static int gfx_v8_0_suspend(void *handle)
5133 {
5134         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5135         adev->gfx.in_suspend = true;
5136         return gfx_v8_0_hw_fini(adev);
5137 }
5138
5139 static int gfx_v8_0_resume(void *handle)
5140 {
5141         int r;
5142         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5143
5144         r = gfx_v8_0_hw_init(adev);
5145         adev->gfx.in_suspend = false;
5146         return r;
5147 }
5148
5149 static bool gfx_v8_0_is_idle(void *handle)
5150 {
5151         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5152
5153         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5154                 return false;
5155         else
5156                 return true;
5157 }
5158
5159 static int gfx_v8_0_wait_for_idle(void *handle)
5160 {
5161         unsigned i;
5162         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5163
5164         for (i = 0; i < adev->usec_timeout; i++) {
5165                 if (gfx_v8_0_is_idle(handle))
5166                         return 0;
5167
5168                 udelay(1);
5169         }
5170         return -ETIMEDOUT;
5171 }
5172
5173 static bool gfx_v8_0_check_soft_reset(void *handle)
5174 {
5175         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5176         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5177         u32 tmp;
5178
5179         /* GRBM_STATUS */
5180         tmp = RREG32(mmGRBM_STATUS);
5181         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5182                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5183                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5184                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5185                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5186                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5187                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5188                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5189                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5190                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5191                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5192                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5193                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5194         }
5195
5196         /* GRBM_STATUS2 */
5197         tmp = RREG32(mmGRBM_STATUS2);
5198         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5199                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5200                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5201
5202         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5203             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5204             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5205                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5206                                                 SOFT_RESET_CPF, 1);
5207                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5208                                                 SOFT_RESET_CPC, 1);
5209                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5210                                                 SOFT_RESET_CPG, 1);
5211                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5212                                                 SOFT_RESET_GRBM, 1);
5213         }
5214
5215         /* SRBM_STATUS */
5216         tmp = RREG32(mmSRBM_STATUS);
5217         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5218                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5219                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5220         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5221                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5222                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5223
5224         if (grbm_soft_reset || srbm_soft_reset) {
5225                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5226                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5227                 return true;
5228         } else {
5229                 adev->gfx.grbm_soft_reset = 0;
5230                 adev->gfx.srbm_soft_reset = 0;
5231                 return false;
5232         }
5233 }
5234
5235 static int gfx_v8_0_pre_soft_reset(void *handle)
5236 {
5237         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5238         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5239
5240         if ((!adev->gfx.grbm_soft_reset) &&
5241             (!adev->gfx.srbm_soft_reset))
5242                 return 0;
5243
5244         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5245         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5246
5247         /* stop the rlc */
5248         gfx_v8_0_rlc_stop(adev);
5249
5250         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5251             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5252                 /* Disable GFX parsing/prefetching */
5253                 gfx_v8_0_cp_gfx_enable(adev, false);
5254
5255         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5256             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5257             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5258             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5259                 int i;
5260
5261                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5262                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5263
5264                         mutex_lock(&adev->srbm_mutex);
5265                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5266                         gfx_v8_0_deactivate_hqd(adev, 2);
5267                         vi_srbm_select(adev, 0, 0, 0, 0);
5268                         mutex_unlock(&adev->srbm_mutex);
5269                 }
5270                 /* Disable MEC parsing/prefetching */
5271                 gfx_v8_0_cp_compute_enable(adev, false);
5272         }
5273
5274        return 0;
5275 }
5276
5277 static int gfx_v8_0_soft_reset(void *handle)
5278 {
5279         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5280         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5281         u32 tmp;
5282
5283         if ((!adev->gfx.grbm_soft_reset) &&
5284             (!adev->gfx.srbm_soft_reset))
5285                 return 0;
5286
5287         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5288         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5289
5290         if (grbm_soft_reset || srbm_soft_reset) {
5291                 tmp = RREG32(mmGMCON_DEBUG);
5292                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5293                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5294                 WREG32(mmGMCON_DEBUG, tmp);
5295                 udelay(50);
5296         }
5297
5298         if (grbm_soft_reset) {
5299                 tmp = RREG32(mmGRBM_SOFT_RESET);
5300                 tmp |= grbm_soft_reset;
5301                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5302                 WREG32(mmGRBM_SOFT_RESET, tmp);
5303                 tmp = RREG32(mmGRBM_SOFT_RESET);
5304
5305                 udelay(50);
5306
5307                 tmp &= ~grbm_soft_reset;
5308                 WREG32(mmGRBM_SOFT_RESET, tmp);
5309                 tmp = RREG32(mmGRBM_SOFT_RESET);
5310         }
5311
5312         if (srbm_soft_reset) {
5313                 tmp = RREG32(mmSRBM_SOFT_RESET);
5314                 tmp |= srbm_soft_reset;
5315                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5316                 WREG32(mmSRBM_SOFT_RESET, tmp);
5317                 tmp = RREG32(mmSRBM_SOFT_RESET);
5318
5319                 udelay(50);
5320
5321                 tmp &= ~srbm_soft_reset;
5322                 WREG32(mmSRBM_SOFT_RESET, tmp);
5323                 tmp = RREG32(mmSRBM_SOFT_RESET);
5324         }
5325
5326         if (grbm_soft_reset || srbm_soft_reset) {
5327                 tmp = RREG32(mmGMCON_DEBUG);
5328                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5329                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5330                 WREG32(mmGMCON_DEBUG, tmp);
5331         }
5332
5333         /* Wait a little for things to settle down */
5334         udelay(50);
5335
5336         return 0;
5337 }
5338
5339 static int gfx_v8_0_post_soft_reset(void *handle)
5340 {
5341         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5342         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343
5344         if ((!adev->gfx.grbm_soft_reset) &&
5345             (!adev->gfx.srbm_soft_reset))
5346                 return 0;
5347
5348         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5349         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5350
5351         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5352             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5353                 gfx_v8_0_cp_gfx_resume(adev);
5354
5355         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5356             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5357             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5358             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5359                 int i;
5360
5361                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5362                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5363
5364                         mutex_lock(&adev->srbm_mutex);
5365                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5366                         gfx_v8_0_deactivate_hqd(adev, 2);
5367                         vi_srbm_select(adev, 0, 0, 0, 0);
5368                         mutex_unlock(&adev->srbm_mutex);
5369                 }
5370                 gfx_v8_0_kiq_resume(adev);
5371         }
5372         gfx_v8_0_rlc_start(adev);
5373
5374         return 0;
5375 }
5376
5377 /**
5378  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5379  *
5380  * @adev: amdgpu_device pointer
5381  *
5382  * Fetches a GPU clock counter snapshot.
5383  * Returns the 64 bit clock counter snapshot.
5384  */
5385 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5386 {
5387         uint64_t clock;
5388
5389         mutex_lock(&adev->gfx.gpu_clock_mutex);
5390         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5391         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5392                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5393         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5394         return clock;
5395 }
5396
5397 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5398                                           uint32_t vmid,
5399                                           uint32_t gds_base, uint32_t gds_size,
5400                                           uint32_t gws_base, uint32_t gws_size,
5401                                           uint32_t oa_base, uint32_t oa_size)
5402 {
5403         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5404         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5405
5406         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5407         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5408
5409         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5410         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5411
5412         /* GDS Base */
5413         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5414         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5415                                 WRITE_DATA_DST_SEL(0)));
5416         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5417         amdgpu_ring_write(ring, 0);
5418         amdgpu_ring_write(ring, gds_base);
5419
5420         /* GDS Size */
5421         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5422         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5423                                 WRITE_DATA_DST_SEL(0)));
5424         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5425         amdgpu_ring_write(ring, 0);
5426         amdgpu_ring_write(ring, gds_size);
5427
5428         /* GWS */
5429         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5430         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5431                                 WRITE_DATA_DST_SEL(0)));
5432         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5433         amdgpu_ring_write(ring, 0);
5434         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5435
5436         /* OA */
5437         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5438         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5439                                 WRITE_DATA_DST_SEL(0)));
5440         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5441         amdgpu_ring_write(ring, 0);
5442         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5443 }
5444
5445 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5446 {
5447         WREG32(mmSQ_IND_INDEX,
5448                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5449                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5450                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5451                 (SQ_IND_INDEX__FORCE_READ_MASK));
5452         return RREG32(mmSQ_IND_DATA);
5453 }
5454
5455 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5456                            uint32_t wave, uint32_t thread,
5457                            uint32_t regno, uint32_t num, uint32_t *out)
5458 {
5459         WREG32(mmSQ_IND_INDEX,
5460                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5461                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5462                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5463                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5464                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5465                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5466         while (num--)
5467                 *(out++) = RREG32(mmSQ_IND_DATA);
5468 }
5469
5470 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5471 {
5472         /* type 0 wave data */
5473         dst[(*no_fields)++] = 0;
5474         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5475         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5476         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5477         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5478         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5479         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5480         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5481         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5482         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5483         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5484         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5485         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5486         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5487         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5488         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5489         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5490         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5491         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5492 }
5493
5494 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5495                                      uint32_t wave, uint32_t start,
5496                                      uint32_t size, uint32_t *dst)
5497 {
5498         wave_read_regs(
5499                 adev, simd, wave, 0,
5500                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5501 }
5502
5503
5504 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5505         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5506         .select_se_sh = &gfx_v8_0_select_se_sh,
5507         .read_wave_data = &gfx_v8_0_read_wave_data,
5508         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5509         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5510 };
5511
5512 static int gfx_v8_0_early_init(void *handle)
5513 {
5514         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5515
5516         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5517         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5518         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5519         gfx_v8_0_set_ring_funcs(adev);
5520         gfx_v8_0_set_irq_funcs(adev);
5521         gfx_v8_0_set_gds_init(adev);
5522         gfx_v8_0_set_rlc_funcs(adev);
5523
5524         return 0;
5525 }
5526
5527 static int gfx_v8_0_late_init(void *handle)
5528 {
5529         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5530         int r;
5531
5532         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5533         if (r)
5534                 return r;
5535
5536         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5537         if (r)
5538                 return r;
5539
5540         /* requires IBs so do in late init after IB pool is initialized */
5541         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5542         if (r)
5543                 return r;
5544
5545         amdgpu_device_ip_set_powergating_state(adev,
5546                                                AMD_IP_BLOCK_TYPE_GFX,
5547                                                AMD_PG_STATE_GATE);
5548
5549         return 0;
5550 }
5551
5552 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5553                                                        bool enable)
5554 {
5555         if ((adev->asic_type == CHIP_POLARIS11) ||
5556             (adev->asic_type == CHIP_POLARIS12) ||
5557             (adev->asic_type == CHIP_VEGAM))
5558                 /* Send msg to SMU via Powerplay */
5559                 amdgpu_device_ip_set_powergating_state(adev,
5560                                                        AMD_IP_BLOCK_TYPE_SMC,
5561                                                        enable ?
5562                                                        AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5563
5564         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5565 }
5566
5567 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5568                                                         bool enable)
5569 {
5570         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5571 }
5572
5573 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5574                 bool enable)
5575 {
5576         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5577 }
5578
5579 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5580                                           bool enable)
5581 {
5582         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5583 }
5584
5585 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5586                                                 bool enable)
5587 {
5588         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5589
5590         /* Read any GFX register to wake up GFX. */
5591         if (!enable)
5592                 RREG32(mmDB_RENDER_CONTROL);
5593 }
5594
5595 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5596                                           bool enable)
5597 {
5598         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5599                 cz_enable_gfx_cg_power_gating(adev, true);
5600                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5601                         cz_enable_gfx_pipeline_power_gating(adev, true);
5602         } else {
5603                 cz_enable_gfx_cg_power_gating(adev, false);
5604                 cz_enable_gfx_pipeline_power_gating(adev, false);
5605         }
5606 }
5607
5608 static int gfx_v8_0_set_powergating_state(void *handle,
5609                                           enum amd_powergating_state state)
5610 {
5611         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5612         bool enable = (state == AMD_PG_STATE_GATE);
5613
5614         if (amdgpu_sriov_vf(adev))
5615                 return 0;
5616
5617         switch (adev->asic_type) {
5618         case CHIP_CARRIZO:
5619         case CHIP_STONEY:
5620
5621                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5622                         cz_enable_sck_slow_down_on_power_up(adev, true);
5623                         cz_enable_sck_slow_down_on_power_down(adev, true);
5624                 } else {
5625                         cz_enable_sck_slow_down_on_power_up(adev, false);
5626                         cz_enable_sck_slow_down_on_power_down(adev, false);
5627                 }
5628                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5629                         cz_enable_cp_power_gating(adev, true);
5630                 else
5631                         cz_enable_cp_power_gating(adev, false);
5632
5633                 cz_update_gfx_cg_power_gating(adev, enable);
5634
5635                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5636                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5637                 else
5638                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5639
5640                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5641                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5642                 else
5643                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5644                 break;
5645         case CHIP_POLARIS11:
5646         case CHIP_POLARIS12:
5647         case CHIP_VEGAM:
5648                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5649                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5650                 else
5651                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5652
5653                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5654                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5655                 else
5656                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5657
5658                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5659                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5660                 else
5661                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5662                 break;
5663         default:
5664                 break;
5665         }
5666
5667         return 0;
5668 }
5669
5670 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5671 {
5672         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5673         int data;
5674
5675         if (amdgpu_sriov_vf(adev))
5676                 *flags = 0;
5677
5678         /* AMD_CG_SUPPORT_GFX_MGCG */
5679         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5680         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5681                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5682
5683         /* AMD_CG_SUPPORT_GFX_CGLG */
5684         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5685         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5686                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5687
5688         /* AMD_CG_SUPPORT_GFX_CGLS */
5689         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5690                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5691
5692         /* AMD_CG_SUPPORT_GFX_CGTS */
5693         data = RREG32(mmCGTS_SM_CTRL_REG);
5694         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5695                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5696
5697         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5698         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5699                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5700
5701         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5702         data = RREG32(mmRLC_MEM_SLP_CNTL);
5703         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5704                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5705
5706         /* AMD_CG_SUPPORT_GFX_CP_LS */
5707         data = RREG32(mmCP_MEM_SLP_CNTL);
5708         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5709                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5710 }
5711
5712 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5713                                      uint32_t reg_addr, uint32_t cmd)
5714 {
5715         uint32_t data;
5716
5717         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5718
5719         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5720         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5721
5722         data = RREG32(mmRLC_SERDES_WR_CTRL);
5723         if (adev->asic_type == CHIP_STONEY)
5724                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5725                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5726                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5727                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5728                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5729                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5730                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5731                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5732                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5733         else
5734                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5735                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5736                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5737                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5738                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5739                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5740                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5741                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5742                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5743                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5744                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5745         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5746                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5747                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5748                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5749
5750         WREG32(mmRLC_SERDES_WR_CTRL, data);
5751 }
5752
5753 #define MSG_ENTER_RLC_SAFE_MODE     1
5754 #define MSG_EXIT_RLC_SAFE_MODE      0
5755 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5756 #define RLC_GPR_REG2__REQ__SHIFT 0
5757 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5758 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5759
5760 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5761 {
5762         u32 data;
5763         unsigned i;
5764
5765         data = RREG32(mmRLC_CNTL);
5766         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5767                 return;
5768
5769         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5770                 data |= RLC_SAFE_MODE__CMD_MASK;
5771                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5772                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5773                 WREG32(mmRLC_SAFE_MODE, data);
5774
5775                 for (i = 0; i < adev->usec_timeout; i++) {
5776                         if ((RREG32(mmRLC_GPM_STAT) &
5777                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5778                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5779                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5780                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5781                                 break;
5782                         udelay(1);
5783                 }
5784
5785                 for (i = 0; i < adev->usec_timeout; i++) {
5786                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5787                                 break;
5788                         udelay(1);
5789                 }
5790                 adev->gfx.rlc.in_safe_mode = true;
5791         }
5792 }
5793
5794 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5795 {
5796         u32 data = 0;
5797         unsigned i;
5798
5799         data = RREG32(mmRLC_CNTL);
5800         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5801                 return;
5802
5803         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5804                 if (adev->gfx.rlc.in_safe_mode) {
5805                         data |= RLC_SAFE_MODE__CMD_MASK;
5806                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5807                         WREG32(mmRLC_SAFE_MODE, data);
5808                         adev->gfx.rlc.in_safe_mode = false;
5809                 }
5810         }
5811
5812         for (i = 0; i < adev->usec_timeout; i++) {
5813                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5814                         break;
5815                 udelay(1);
5816         }
5817 }
5818
5819 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5820         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5821         .exit_safe_mode = iceland_exit_rlc_safe_mode
5822 };
5823
5824 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5825                                                       bool enable)
5826 {
5827         uint32_t temp, data;
5828
5829         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5830
5831         /* It is disabled by HW by default */
5832         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5833                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5834                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5835                                 /* 1 - RLC memory Light sleep */
5836                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5837
5838                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5839                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5840                 }
5841
5842                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5843                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5844                 if (adev->flags & AMD_IS_APU)
5845                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5846                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5847                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5848                 else
5849                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5850                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5851                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5852                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5853
5854                 if (temp != data)
5855                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5856
5857                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5858                 gfx_v8_0_wait_for_rlc_serdes(adev);
5859
5860                 /* 5 - clear mgcg override */
5861                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5862
5863                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5864                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5865                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5866                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5867                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5868                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5869                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5870                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5871                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5872                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5873                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5874                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5875                         if (temp != data)
5876                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5877                 }
5878                 udelay(50);
5879
5880                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5881                 gfx_v8_0_wait_for_rlc_serdes(adev);
5882         } else {
5883                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5884                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5885                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5886                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5887                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5888                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5889                 if (temp != data)
5890                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5891
5892                 /* 2 - disable MGLS in RLC */
5893                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5894                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5895                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5896                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5897                 }
5898
5899                 /* 3 - disable MGLS in CP */
5900                 data = RREG32(mmCP_MEM_SLP_CNTL);
5901                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5902                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5903                         WREG32(mmCP_MEM_SLP_CNTL, data);
5904                 }
5905
5906                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5907                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5908                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5909                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5910                 if (temp != data)
5911                         WREG32(mmCGTS_SM_CTRL_REG, data);
5912
5913                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5914                 gfx_v8_0_wait_for_rlc_serdes(adev);
5915
5916                 /* 6 - set mgcg override */
5917                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5918
5919                 udelay(50);
5920
5921                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5922                 gfx_v8_0_wait_for_rlc_serdes(adev);
5923         }
5924
5925         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5926 }
5927
5928 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5929                                                       bool enable)
5930 {
5931         uint32_t temp, temp1, data, data1;
5932
5933         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5934
5935         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5936
5937         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5938                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5940                 if (temp1 != data1)
5941                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5942
5943                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5944                 gfx_v8_0_wait_for_rlc_serdes(adev);
5945
5946                 /* 2 - clear cgcg override */
5947                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5948
5949                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5950                 gfx_v8_0_wait_for_rlc_serdes(adev);
5951
5952                 /* 3 - write cmd to set CGLS */
5953                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5954
5955                 /* 4 - enable cgcg */
5956                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5957
5958                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5959                         /* enable cgls*/
5960                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5961
5962                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5963                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5964
5965                         if (temp1 != data1)
5966                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5967                 } else {
5968                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5969                 }
5970
5971                 if (temp != data)
5972                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5973
5974                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5975                  * Cmp_busy/GFX_Idle interrupts
5976                  */
5977                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5978         } else {
5979                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5980                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5981
5982                 /* TEST CGCG */
5983                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5984                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5985                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5986                 if (temp1 != data1)
5987                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5988
5989                 /* read gfx register to wake up cgcg */
5990                 RREG32(mmCB_CGTT_SCLK_CTRL);
5991                 RREG32(mmCB_CGTT_SCLK_CTRL);
5992                 RREG32(mmCB_CGTT_SCLK_CTRL);
5993                 RREG32(mmCB_CGTT_SCLK_CTRL);
5994
5995                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5996                 gfx_v8_0_wait_for_rlc_serdes(adev);
5997
5998                 /* write cmd to Set CGCG Overrride */
5999                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6000
6001                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6002                 gfx_v8_0_wait_for_rlc_serdes(adev);
6003
6004                 /* write cmd to Clear CGLS */
6005                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6006
6007                 /* disable cgcg, cgls should be disabled too. */
6008                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6009                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6010                 if (temp != data)
6011                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6012                 /* enable interrupts again for PG */
6013                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6014         }
6015
6016         gfx_v8_0_wait_for_rlc_serdes(adev);
6017
6018         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6019 }
6020 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6021                                             bool enable)
6022 {
6023         if (enable) {
6024                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6025                  * ===  MGCG + MGLS + TS(CG/LS) ===
6026                  */
6027                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6028                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6029         } else {
6030                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6031                  * ===  CGCG + CGLS ===
6032                  */
6033                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6034                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6035         }
6036         return 0;
6037 }
6038
6039 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6040                                           enum amd_clockgating_state state)
6041 {
6042         uint32_t msg_id, pp_state = 0;
6043         uint32_t pp_support_state = 0;
6044
6045         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6046                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6047                         pp_support_state = PP_STATE_SUPPORT_LS;
6048                         pp_state = PP_STATE_LS;
6049                 }
6050                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6051                         pp_support_state |= PP_STATE_SUPPORT_CG;
6052                         pp_state |= PP_STATE_CG;
6053                 }
6054                 if (state == AMD_CG_STATE_UNGATE)
6055                         pp_state = 0;
6056
6057                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6058                                 PP_BLOCK_GFX_CG,
6059                                 pp_support_state,
6060                                 pp_state);
6061                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6062                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6063         }
6064
6065         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6066                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6067                         pp_support_state = PP_STATE_SUPPORT_LS;
6068                         pp_state = PP_STATE_LS;
6069                 }
6070
6071                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6072                         pp_support_state |= PP_STATE_SUPPORT_CG;
6073                         pp_state |= PP_STATE_CG;
6074                 }
6075
6076                 if (state == AMD_CG_STATE_UNGATE)
6077                         pp_state = 0;
6078
6079                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6080                                 PP_BLOCK_GFX_MG,
6081                                 pp_support_state,
6082                                 pp_state);
6083                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6084                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6085         }
6086
6087         return 0;
6088 }
6089
6090 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6091                                           enum amd_clockgating_state state)
6092 {
6093
6094         uint32_t msg_id, pp_state = 0;
6095         uint32_t pp_support_state = 0;
6096
6097         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6098                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6099                         pp_support_state = PP_STATE_SUPPORT_LS;
6100                         pp_state = PP_STATE_LS;
6101                 }
6102                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6103                         pp_support_state |= PP_STATE_SUPPORT_CG;
6104                         pp_state |= PP_STATE_CG;
6105                 }
6106                 if (state == AMD_CG_STATE_UNGATE)
6107                         pp_state = 0;
6108
6109                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6110                                 PP_BLOCK_GFX_CG,
6111                                 pp_support_state,
6112                                 pp_state);
6113                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6114                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6115         }
6116
6117         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6118                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6119                         pp_support_state = PP_STATE_SUPPORT_LS;
6120                         pp_state = PP_STATE_LS;
6121                 }
6122                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6123                         pp_support_state |= PP_STATE_SUPPORT_CG;
6124                         pp_state |= PP_STATE_CG;
6125                 }
6126                 if (state == AMD_CG_STATE_UNGATE)
6127                         pp_state = 0;
6128
6129                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6130                                 PP_BLOCK_GFX_3D,
6131                                 pp_support_state,
6132                                 pp_state);
6133                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6134                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6135         }
6136
6137         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6138                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6139                         pp_support_state = PP_STATE_SUPPORT_LS;
6140                         pp_state = PP_STATE_LS;
6141                 }
6142
6143                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6144                         pp_support_state |= PP_STATE_SUPPORT_CG;
6145                         pp_state |= PP_STATE_CG;
6146                 }
6147
6148                 if (state == AMD_CG_STATE_UNGATE)
6149                         pp_state = 0;
6150
6151                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6152                                 PP_BLOCK_GFX_MG,
6153                                 pp_support_state,
6154                                 pp_state);
6155                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6156                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6157         }
6158
6159         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6160                 pp_support_state = PP_STATE_SUPPORT_LS;
6161
6162                 if (state == AMD_CG_STATE_UNGATE)
6163                         pp_state = 0;
6164                 else
6165                         pp_state = PP_STATE_LS;
6166
6167                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6168                                 PP_BLOCK_GFX_RLC,
6169                                 pp_support_state,
6170                                 pp_state);
6171                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6172                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6173         }
6174
6175         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6176                 pp_support_state = PP_STATE_SUPPORT_LS;
6177
6178                 if (state == AMD_CG_STATE_UNGATE)
6179                         pp_state = 0;
6180                 else
6181                         pp_state = PP_STATE_LS;
6182                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6183                         PP_BLOCK_GFX_CP,
6184                         pp_support_state,
6185                         pp_state);
6186                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6187                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6188         }
6189
6190         return 0;
6191 }
6192
6193 static int gfx_v8_0_set_clockgating_state(void *handle,
6194                                           enum amd_clockgating_state state)
6195 {
6196         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6197
6198         if (amdgpu_sriov_vf(adev))
6199                 return 0;
6200
6201         switch (adev->asic_type) {
6202         case CHIP_FIJI:
6203         case CHIP_CARRIZO:
6204         case CHIP_STONEY:
6205                 gfx_v8_0_update_gfx_clock_gating(adev,
6206                                                  state == AMD_CG_STATE_GATE);
6207                 break;
6208         case CHIP_TONGA:
6209                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6210                 break;
6211         case CHIP_POLARIS10:
6212         case CHIP_POLARIS11:
6213         case CHIP_POLARIS12:
6214         case CHIP_VEGAM:
6215                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6216                 break;
6217         default:
6218                 break;
6219         }
6220         return 0;
6221 }
6222
6223 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6224 {
6225         return ring->adev->wb.wb[ring->rptr_offs];
6226 }
6227
6228 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6229 {
6230         struct amdgpu_device *adev = ring->adev;
6231
6232         if (ring->use_doorbell)
6233                 /* XXX check if swapping is necessary on BE */
6234                 return ring->adev->wb.wb[ring->wptr_offs];
6235         else
6236                 return RREG32(mmCP_RB0_WPTR);
6237 }
6238
6239 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6240 {
6241         struct amdgpu_device *adev = ring->adev;
6242
6243         if (ring->use_doorbell) {
6244                 /* XXX check if swapping is necessary on BE */
6245                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6246                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6247         } else {
6248                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6249                 (void)RREG32(mmCP_RB0_WPTR);
6250         }
6251 }
6252
6253 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6254 {
6255         u32 ref_and_mask, reg_mem_engine;
6256
6257         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6258             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6259                 switch (ring->me) {
6260                 case 1:
6261                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6262                         break;
6263                 case 2:
6264                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6265                         break;
6266                 default:
6267                         return;
6268                 }
6269                 reg_mem_engine = 0;
6270         } else {
6271                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6272                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6273         }
6274
6275         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6276         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6277                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6278                                  reg_mem_engine));
6279         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6280         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6281         amdgpu_ring_write(ring, ref_and_mask);
6282         amdgpu_ring_write(ring, ref_and_mask);
6283         amdgpu_ring_write(ring, 0x20); /* poll interval */
6284 }
6285
6286 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6287 {
6288         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6289         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6290                 EVENT_INDEX(4));
6291
6292         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6293         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6294                 EVENT_INDEX(0));
6295 }
6296
6297 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6298                                       struct amdgpu_ib *ib,
6299                                       unsigned vmid, bool ctx_switch)
6300 {
6301         u32 header, control = 0;
6302
6303         if (ib->flags & AMDGPU_IB_FLAG_CE)
6304                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6305         else
6306                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6307
6308         control |= ib->length_dw | (vmid << 24);
6309
6310         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6311                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6312
6313                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6314                         gfx_v8_0_ring_emit_de_meta(ring);
6315         }
6316
6317         amdgpu_ring_write(ring, header);
6318         amdgpu_ring_write(ring,
6319 #ifdef __BIG_ENDIAN
6320                           (2 << 0) |
6321 #endif
6322                           (ib->gpu_addr & 0xFFFFFFFC));
6323         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6324         amdgpu_ring_write(ring, control);
6325 }
6326
6327 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6328                                           struct amdgpu_ib *ib,
6329                                           unsigned vmid, bool ctx_switch)
6330 {
6331         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6332
6333         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6334         amdgpu_ring_write(ring,
6335 #ifdef __BIG_ENDIAN
6336                                 (2 << 0) |
6337 #endif
6338                                 (ib->gpu_addr & 0xFFFFFFFC));
6339         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6340         amdgpu_ring_write(ring, control);
6341 }
6342
6343 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6344                                          u64 seq, unsigned flags)
6345 {
6346         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6347         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6348
6349         /* EVENT_WRITE_EOP - flush caches, send int */
6350         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6351         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6352                                  EOP_TC_ACTION_EN |
6353                                  EOP_TC_WB_ACTION_EN |
6354                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6355                                  EVENT_INDEX(5)));
6356         amdgpu_ring_write(ring, addr & 0xfffffffc);
6357         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6358                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6359         amdgpu_ring_write(ring, lower_32_bits(seq));
6360         amdgpu_ring_write(ring, upper_32_bits(seq));
6361
6362 }
6363
6364 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6365 {
6366         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6367         uint32_t seq = ring->fence_drv.sync_seq;
6368         uint64_t addr = ring->fence_drv.gpu_addr;
6369
6370         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6371         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6372                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6373                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6374         amdgpu_ring_write(ring, addr & 0xfffffffc);
6375         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6376         amdgpu_ring_write(ring, seq);
6377         amdgpu_ring_write(ring, 0xffffffff);
6378         amdgpu_ring_write(ring, 4); /* poll interval */
6379 }
6380
6381 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6382                                         unsigned vmid, uint64_t pd_addr)
6383 {
6384         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6385
6386         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6387
6388         /* wait for the invalidate to complete */
6389         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6390         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6391                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6392                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6393         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6394         amdgpu_ring_write(ring, 0);
6395         amdgpu_ring_write(ring, 0); /* ref */
6396         amdgpu_ring_write(ring, 0); /* mask */
6397         amdgpu_ring_write(ring, 0x20); /* poll interval */
6398
6399         /* compute doesn't have PFP */
6400         if (usepfp) {
6401                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6402                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6403                 amdgpu_ring_write(ring, 0x0);
6404         }
6405 }
6406
6407 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6408 {
6409         return ring->adev->wb.wb[ring->wptr_offs];
6410 }
6411
6412 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6413 {
6414         struct amdgpu_device *adev = ring->adev;
6415
6416         /* XXX check if swapping is necessary on BE */
6417         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6418         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6419 }
6420
6421 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6422                                            bool acquire)
6423 {
6424         struct amdgpu_device *adev = ring->adev;
6425         int pipe_num, tmp, reg;
6426         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6427
6428         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6429
6430         /* first me only has 2 entries, GFX and HP3D */
6431         if (ring->me > 0)
6432                 pipe_num -= 2;
6433
6434         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6435         tmp = RREG32(reg);
6436         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6437         WREG32(reg, tmp);
6438 }
6439
6440 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6441                                             struct amdgpu_ring *ring,
6442                                             bool acquire)
6443 {
6444         int i, pipe;
6445         bool reserve;
6446         struct amdgpu_ring *iring;
6447
6448         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6449         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6450         if (acquire)
6451                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6452         else
6453                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6454
6455         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6456                 /* Clear all reservations - everyone reacquires all resources */
6457                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6458                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6459                                                        true);
6460
6461                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6462                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6463                                                        true);
6464         } else {
6465                 /* Lower all pipes without a current reservation */
6466                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6467                         iring = &adev->gfx.gfx_ring[i];
6468                         pipe = amdgpu_gfx_queue_to_bit(adev,
6469                                                        iring->me,
6470                                                        iring->pipe,
6471                                                        0);
6472                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6473                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6474                 }
6475
6476                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6477                         iring = &adev->gfx.compute_ring[i];
6478                         pipe = amdgpu_gfx_queue_to_bit(adev,
6479                                                        iring->me,
6480                                                        iring->pipe,
6481                                                        0);
6482                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6483                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6484                 }
6485         }
6486
6487         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6488 }
6489
6490 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6491                                       struct amdgpu_ring *ring,
6492                                       bool acquire)
6493 {
6494         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6495         uint32_t queue_priority = acquire ? 0xf : 0x0;
6496
6497         mutex_lock(&adev->srbm_mutex);
6498         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6499
6500         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6501         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6502
6503         vi_srbm_select(adev, 0, 0, 0, 0);
6504         mutex_unlock(&adev->srbm_mutex);
6505 }
6506 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6507                                                enum drm_sched_priority priority)
6508 {
6509         struct amdgpu_device *adev = ring->adev;
6510         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6511
6512         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6513                 return;
6514
6515         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6516         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6517 }
6518
6519 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6520                                              u64 addr, u64 seq,
6521                                              unsigned flags)
6522 {
6523         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6524         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6525
6526         /* RELEASE_MEM - flush caches, send int */
6527         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6528         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6529                                  EOP_TC_ACTION_EN |
6530                                  EOP_TC_WB_ACTION_EN |
6531                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6532                                  EVENT_INDEX(5)));
6533         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6534         amdgpu_ring_write(ring, addr & 0xfffffffc);
6535         amdgpu_ring_write(ring, upper_32_bits(addr));
6536         amdgpu_ring_write(ring, lower_32_bits(seq));
6537         amdgpu_ring_write(ring, upper_32_bits(seq));
6538 }
6539
6540 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6541                                          u64 seq, unsigned int flags)
6542 {
6543         /* we only allocate 32bit for each seq wb address */
6544         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6545
6546         /* write fence seq to the "addr" */
6547         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6548         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6549                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6550         amdgpu_ring_write(ring, lower_32_bits(addr));
6551         amdgpu_ring_write(ring, upper_32_bits(addr));
6552         amdgpu_ring_write(ring, lower_32_bits(seq));
6553
6554         if (flags & AMDGPU_FENCE_FLAG_INT) {
6555                 /* set register to trigger INT */
6556                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6557                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6558                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6559                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6560                 amdgpu_ring_write(ring, 0);
6561                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6562         }
6563 }
6564
6565 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6566 {
6567         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6568         amdgpu_ring_write(ring, 0);
6569 }
6570
6571 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6572 {
6573         uint32_t dw2 = 0;
6574
6575         if (amdgpu_sriov_vf(ring->adev))
6576                 gfx_v8_0_ring_emit_ce_meta(ring);
6577
6578         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6579         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6580                 gfx_v8_0_ring_emit_vgt_flush(ring);
6581                 /* set load_global_config & load_global_uconfig */
6582                 dw2 |= 0x8001;
6583                 /* set load_cs_sh_regs */
6584                 dw2 |= 0x01000000;
6585                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6586                 dw2 |= 0x10002;
6587
6588                 /* set load_ce_ram if preamble presented */
6589                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6590                         dw2 |= 0x10000000;
6591         } else {
6592                 /* still load_ce_ram if this is the first time preamble presented
6593                  * although there is no context switch happens.
6594                  */
6595                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6596                         dw2 |= 0x10000000;
6597         }
6598
6599         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6600         amdgpu_ring_write(ring, dw2);
6601         amdgpu_ring_write(ring, 0);
6602 }
6603
6604 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6605 {
6606         unsigned ret;
6607
6608         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6609         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6610         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6611         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6612         ret = ring->wptr & ring->buf_mask;
6613         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6614         return ret;
6615 }
6616
6617 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6618 {
6619         unsigned cur;
6620
6621         BUG_ON(offset > ring->buf_mask);
6622         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6623
6624         cur = (ring->wptr & ring->buf_mask) - 1;
6625         if (likely(cur > offset))
6626                 ring->ring[offset] = cur - offset;
6627         else
6628                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6629 }
6630
6631 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6632 {
6633         struct amdgpu_device *adev = ring->adev;
6634
6635         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6636         amdgpu_ring_write(ring, 0 |     /* src: register*/
6637                                 (5 << 8) |      /* dst: memory */
6638                                 (1 << 20));     /* write confirm */
6639         amdgpu_ring_write(ring, reg);
6640         amdgpu_ring_write(ring, 0);
6641         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6642                                 adev->virt.reg_val_offs * 4));
6643         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6644                                 adev->virt.reg_val_offs * 4));
6645 }
6646
6647 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6648                                   uint32_t val)
6649 {
6650         uint32_t cmd;
6651
6652         switch (ring->funcs->type) {
6653         case AMDGPU_RING_TYPE_GFX:
6654                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6655                 break;
6656         case AMDGPU_RING_TYPE_KIQ:
6657                 cmd = 1 << 16; /* no inc addr */
6658                 break;
6659         default:
6660                 cmd = WR_CONFIRM;
6661                 break;
6662         }
6663
6664         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6665         amdgpu_ring_write(ring, cmd);
6666         amdgpu_ring_write(ring, reg);
6667         amdgpu_ring_write(ring, 0);
6668         amdgpu_ring_write(ring, val);
6669 }
6670
6671 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6672                                                  enum amdgpu_interrupt_state state)
6673 {
6674         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6675                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6676 }
6677
6678 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6679                                                      int me, int pipe,
6680                                                      enum amdgpu_interrupt_state state)
6681 {
6682         u32 mec_int_cntl, mec_int_cntl_reg;
6683
6684         /*
6685          * amdgpu controls only the first MEC. That's why this function only
6686          * handles the setting of interrupts for this specific MEC. All other
6687          * pipes' interrupts are set by amdkfd.
6688          */
6689
6690         if (me == 1) {
6691                 switch (pipe) {
6692                 case 0:
6693                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6694                         break;
6695                 case 1:
6696                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6697                         break;
6698                 case 2:
6699                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6700                         break;
6701                 case 3:
6702                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6703                         break;
6704                 default:
6705                         DRM_DEBUG("invalid pipe %d\n", pipe);
6706                         return;
6707                 }
6708         } else {
6709                 DRM_DEBUG("invalid me %d\n", me);
6710                 return;
6711         }
6712
6713         switch (state) {
6714         case AMDGPU_IRQ_STATE_DISABLE:
6715                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6716                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6717                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6718                 break;
6719         case AMDGPU_IRQ_STATE_ENABLE:
6720                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6721                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6722                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6723                 break;
6724         default:
6725                 break;
6726         }
6727 }
6728
6729 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6730                                              struct amdgpu_irq_src *source,
6731                                              unsigned type,
6732                                              enum amdgpu_interrupt_state state)
6733 {
6734         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6735                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6736
6737         return 0;
6738 }
6739
6740 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6741                                               struct amdgpu_irq_src *source,
6742                                               unsigned type,
6743                                               enum amdgpu_interrupt_state state)
6744 {
6745         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6746                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6747
6748         return 0;
6749 }
6750
6751 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6752                                             struct amdgpu_irq_src *src,
6753                                             unsigned type,
6754                                             enum amdgpu_interrupt_state state)
6755 {
6756         switch (type) {
6757         case AMDGPU_CP_IRQ_GFX_EOP:
6758                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6759                 break;
6760         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6761                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6762                 break;
6763         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6764                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6765                 break;
6766         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6767                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6768                 break;
6769         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6770                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6771                 break;
6772         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6773                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6774                 break;
6775         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6776                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6777                 break;
6778         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6779                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6780                 break;
6781         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6782                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6783                 break;
6784         default:
6785                 break;
6786         }
6787         return 0;
6788 }
6789
6790 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6791                             struct amdgpu_irq_src *source,
6792                             struct amdgpu_iv_entry *entry)
6793 {
6794         int i;
6795         u8 me_id, pipe_id, queue_id;
6796         struct amdgpu_ring *ring;
6797
6798         DRM_DEBUG("IH: CP EOP\n");
6799         me_id = (entry->ring_id & 0x0c) >> 2;
6800         pipe_id = (entry->ring_id & 0x03) >> 0;
6801         queue_id = (entry->ring_id & 0x70) >> 4;
6802
6803         switch (me_id) {
6804         case 0:
6805                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6806                 break;
6807         case 1:
6808         case 2:
6809                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6810                         ring = &adev->gfx.compute_ring[i];
6811                         /* Per-queue interrupt is supported for MEC starting from VI.
6812                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6813                           */
6814                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6815                                 amdgpu_fence_process(ring);
6816                 }
6817                 break;
6818         }
6819         return 0;
6820 }
6821
6822 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6823                                  struct amdgpu_irq_src *source,
6824                                  struct amdgpu_iv_entry *entry)
6825 {
6826         DRM_ERROR("Illegal register access in command stream\n");
6827         schedule_work(&adev->reset_work);
6828         return 0;
6829 }
6830
6831 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6832                                   struct amdgpu_irq_src *source,
6833                                   struct amdgpu_iv_entry *entry)
6834 {
6835         DRM_ERROR("Illegal instruction in command stream\n");
6836         schedule_work(&adev->reset_work);
6837         return 0;
6838 }
6839
6840 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6841                                             struct amdgpu_irq_src *src,
6842                                             unsigned int type,
6843                                             enum amdgpu_interrupt_state state)
6844 {
6845         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6846
6847         switch (type) {
6848         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6849                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6850                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6851                 if (ring->me == 1)
6852                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6853                                      ring->pipe,
6854                                      GENERIC2_INT_ENABLE,
6855                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6856                 else
6857                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6858                                      ring->pipe,
6859                                      GENERIC2_INT_ENABLE,
6860                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6861                 break;
6862         default:
6863                 BUG(); /* kiq only support GENERIC2_INT now */
6864                 break;
6865         }
6866         return 0;
6867 }
6868
6869 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6870                             struct amdgpu_irq_src *source,
6871                             struct amdgpu_iv_entry *entry)
6872 {
6873         u8 me_id, pipe_id, queue_id;
6874         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6875
6876         me_id = (entry->ring_id & 0x0c) >> 2;
6877         pipe_id = (entry->ring_id & 0x03) >> 0;
6878         queue_id = (entry->ring_id & 0x70) >> 4;
6879         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6880                    me_id, pipe_id, queue_id);
6881
6882         amdgpu_fence_process(ring);
6883         return 0;
6884 }
6885
6886 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6887         .name = "gfx_v8_0",
6888         .early_init = gfx_v8_0_early_init,
6889         .late_init = gfx_v8_0_late_init,
6890         .sw_init = gfx_v8_0_sw_init,
6891         .sw_fini = gfx_v8_0_sw_fini,
6892         .hw_init = gfx_v8_0_hw_init,
6893         .hw_fini = gfx_v8_0_hw_fini,
6894         .suspend = gfx_v8_0_suspend,
6895         .resume = gfx_v8_0_resume,
6896         .is_idle = gfx_v8_0_is_idle,
6897         .wait_for_idle = gfx_v8_0_wait_for_idle,
6898         .check_soft_reset = gfx_v8_0_check_soft_reset,
6899         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6900         .soft_reset = gfx_v8_0_soft_reset,
6901         .post_soft_reset = gfx_v8_0_post_soft_reset,
6902         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6903         .set_powergating_state = gfx_v8_0_set_powergating_state,
6904         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6905 };
6906
6907 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6908         .type = AMDGPU_RING_TYPE_GFX,
6909         .align_mask = 0xff,
6910         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6911         .support_64bit_ptrs = false,
6912         .get_rptr = gfx_v8_0_ring_get_rptr,
6913         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6914         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6915         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6916                 5 +  /* COND_EXEC */
6917                 7 +  /* PIPELINE_SYNC */
6918                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6919                 8 +  /* FENCE for VM_FLUSH */
6920                 20 + /* GDS switch */
6921                 4 + /* double SWITCH_BUFFER,
6922                        the first COND_EXEC jump to the place just
6923                            prior to this double SWITCH_BUFFER  */
6924                 5 + /* COND_EXEC */
6925                 7 +      /*     HDP_flush */
6926                 4 +      /*     VGT_flush */
6927                 14 + /* CE_META */
6928                 31 + /* DE_META */
6929                 3 + /* CNTX_CTRL */
6930                 5 + /* HDP_INVL */
6931                 8 + 8 + /* FENCE x2 */
6932                 2, /* SWITCH_BUFFER */
6933         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6934         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6935         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6936         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6937         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6938         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6939         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6940         .test_ring = gfx_v8_0_ring_test_ring,
6941         .test_ib = gfx_v8_0_ring_test_ib,
6942         .insert_nop = amdgpu_ring_insert_nop,
6943         .pad_ib = amdgpu_ring_generic_pad_ib,
6944         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6945         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6946         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6947         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6948         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6949 };
6950
6951 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6952         .type = AMDGPU_RING_TYPE_COMPUTE,
6953         .align_mask = 0xff,
6954         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6955         .support_64bit_ptrs = false,
6956         .get_rptr = gfx_v8_0_ring_get_rptr,
6957         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6958         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6959         .emit_frame_size =
6960                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6961                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6962                 5 + /* hdp_invalidate */
6963                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6964                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6965                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6966         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6967         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6968         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6969         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6970         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6971         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6972         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6973         .test_ring = gfx_v8_0_ring_test_ring,
6974         .test_ib = gfx_v8_0_ring_test_ib,
6975         .insert_nop = amdgpu_ring_insert_nop,
6976         .pad_ib = amdgpu_ring_generic_pad_ib,
6977         .set_priority = gfx_v8_0_ring_set_priority_compute,
6978         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6979 };
6980
6981 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6982         .type = AMDGPU_RING_TYPE_KIQ,
6983         .align_mask = 0xff,
6984         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6985         .support_64bit_ptrs = false,
6986         .get_rptr = gfx_v8_0_ring_get_rptr,
6987         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6988         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6989         .emit_frame_size =
6990                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6991                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6992                 5 + /* hdp_invalidate */
6993                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6994                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6995                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6996         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6997         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6998         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6999         .test_ring = gfx_v8_0_ring_test_ring,
7000         .test_ib = gfx_v8_0_ring_test_ib,
7001         .insert_nop = amdgpu_ring_insert_nop,
7002         .pad_ib = amdgpu_ring_generic_pad_ib,
7003         .emit_rreg = gfx_v8_0_ring_emit_rreg,
7004         .emit_wreg = gfx_v8_0_ring_emit_wreg,
7005 };
7006
7007 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7008 {
7009         int i;
7010
7011         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7012
7013         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7014                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7015
7016         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7017                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7018 }
7019
7020 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7021         .set = gfx_v8_0_set_eop_interrupt_state,
7022         .process = gfx_v8_0_eop_irq,
7023 };
7024
7025 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7026         .set = gfx_v8_0_set_priv_reg_fault_state,
7027         .process = gfx_v8_0_priv_reg_irq,
7028 };
7029
7030 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7031         .set = gfx_v8_0_set_priv_inst_fault_state,
7032         .process = gfx_v8_0_priv_inst_irq,
7033 };
7034
7035 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7036         .set = gfx_v8_0_kiq_set_interrupt_state,
7037         .process = gfx_v8_0_kiq_irq,
7038 };
7039
7040 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7041 {
7042         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7043         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7044
7045         adev->gfx.priv_reg_irq.num_types = 1;
7046         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7047
7048         adev->gfx.priv_inst_irq.num_types = 1;
7049         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7050
7051         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7052         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7053 }
7054
7055 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7056 {
7057         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7058 }
7059
7060 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7061 {
7062         /* init asci gds info */
7063         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7064         adev->gds.gws.total_size = 64;
7065         adev->gds.oa.total_size = 16;
7066
7067         if (adev->gds.mem.total_size == 64 * 1024) {
7068                 adev->gds.mem.gfx_partition_size = 4096;
7069                 adev->gds.mem.cs_partition_size = 4096;
7070
7071                 adev->gds.gws.gfx_partition_size = 4;
7072                 adev->gds.gws.cs_partition_size = 4;
7073
7074                 adev->gds.oa.gfx_partition_size = 4;
7075                 adev->gds.oa.cs_partition_size = 1;
7076         } else {
7077                 adev->gds.mem.gfx_partition_size = 1024;
7078                 adev->gds.mem.cs_partition_size = 1024;
7079
7080                 adev->gds.gws.gfx_partition_size = 16;
7081                 adev->gds.gws.cs_partition_size = 16;
7082
7083                 adev->gds.oa.gfx_partition_size = 4;
7084                 adev->gds.oa.cs_partition_size = 4;
7085         }
7086 }
7087
7088 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7089                                                  u32 bitmap)
7090 {
7091         u32 data;
7092
7093         if (!bitmap)
7094                 return;
7095
7096         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7097         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7098
7099         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7100 }
7101
7102 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7103 {
7104         u32 data, mask;
7105
7106         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7107                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7108
7109         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7110
7111         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7112 }
7113
7114 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7115 {
7116         int i, j, k, counter, active_cu_number = 0;
7117         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7118         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7119         unsigned disable_masks[4 * 2];
7120         u32 ao_cu_num;
7121
7122         memset(cu_info, 0, sizeof(*cu_info));
7123
7124         if (adev->flags & AMD_IS_APU)
7125                 ao_cu_num = 2;
7126         else
7127                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7128
7129         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7130
7131         mutex_lock(&adev->grbm_idx_mutex);
7132         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7133                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7134                         mask = 1;
7135                         ao_bitmap = 0;
7136                         counter = 0;
7137                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7138                         if (i < 4 && j < 2)
7139                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7140                                         adev, disable_masks[i * 2 + j]);
7141                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7142                         cu_info->bitmap[i][j] = bitmap;
7143
7144                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7145                                 if (bitmap & mask) {
7146                                         if (counter < ao_cu_num)
7147                                                 ao_bitmap |= mask;
7148                                         counter ++;
7149                                 }
7150                                 mask <<= 1;
7151                         }
7152                         active_cu_number += counter;
7153                         if (i < 2 && j < 2)
7154                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7155                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7156                 }
7157         }
7158         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7159         mutex_unlock(&adev->grbm_idx_mutex);
7160
7161         cu_info->number = active_cu_number;
7162         cu_info->ao_cu_mask = ao_cu_mask;
7163         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7164         cu_info->max_waves_per_simd = 10;
7165         cu_info->max_scratch_slots_per_cu = 32;
7166         cu_info->wave_front_size = 64;
7167         cu_info->lds_size = 64;
7168 }
7169
7170 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7171 {
7172         .type = AMD_IP_BLOCK_TYPE_GFX,
7173         .major = 8,
7174         .minor = 0,
7175         .rev = 0,
7176         .funcs = &gfx_v8_0_ip_funcs,
7177 };
7178
7179 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7180 {
7181         .type = AMD_IP_BLOCK_TYPE_GFX,
7182         .major = 8,
7183         .minor = 1,
7184         .rev = 0,
7185         .funcs = &gfx_v8_0_ip_funcs,
7186 };
7187
7188 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7189 {
7190         uint64_t ce_payload_addr;
7191         int cnt_ce;
7192         union {
7193                 struct vi_ce_ib_state regular;
7194                 struct vi_ce_ib_state_chained_ib chained;
7195         } ce_payload = {};
7196
7197         if (ring->adev->virt.chained_ib_support) {
7198                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7199                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7200                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7201         } else {
7202                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7203                         offsetof(struct vi_gfx_meta_data, ce_payload);
7204                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7205         }
7206
7207         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7208         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7209                                 WRITE_DATA_DST_SEL(8) |
7210                                 WR_CONFIRM) |
7211                                 WRITE_DATA_CACHE_POLICY(0));
7212         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7213         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7214         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7215 }
7216
7217 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7218 {
7219         uint64_t de_payload_addr, gds_addr, csa_addr;
7220         int cnt_de;
7221         union {
7222                 struct vi_de_ib_state regular;
7223                 struct vi_de_ib_state_chained_ib chained;
7224         } de_payload = {};
7225
7226         csa_addr = amdgpu_csa_vaddr(ring->adev);
7227         gds_addr = csa_addr + 4096;
7228         if (ring->adev->virt.chained_ib_support) {
7229                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7230                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7231                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7232                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7233         } else {
7234                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7235                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7236                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7237                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7238         }
7239
7240         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7241         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7242                                 WRITE_DATA_DST_SEL(8) |
7243                                 WR_CONFIRM) |
7244                                 WRITE_DATA_CACHE_POLICY(0));
7245         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7246         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7247         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7248 }