drm/amdgpu: support per device powerplay enablement (v2)
[linux-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #define GFX8_NUM_GFX_RINGS     1
53 #define GFX8_NUM_COMPUTE_RINGS 8
54
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58
59 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68
69 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
70 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
72 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
73 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
75
76 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
77 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
79 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
80 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
81
82 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
83 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
85 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
86 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
88
89 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
90 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
92 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
93 MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
95
96 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
97 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
99 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
100 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
102
103 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
104 {
105         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
106         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
107         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
108         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
109         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
110         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
111         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
112         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
113         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
114         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
115         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
116         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
117         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
118         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
119         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
120         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
121 };
122
123 static const u32 golden_settings_tonga_a11[] =
124 {
125         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
126         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
127         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
128         mmGB_GPU_ID, 0x0000000f, 0x00000000,
129         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
130         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
131         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
132         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
133         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
134         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
135         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
136         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
137         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
138         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
139         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
140 };
141
142 static const u32 tonga_golden_common_all[] =
143 {
144         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
145         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
146         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
147         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
148         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
149         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
150         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
151         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
152 };
153
154 static const u32 tonga_mgcg_cgcg_init[] =
155 {
156         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
157         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
158         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
159         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
160         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
161         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
162         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
163         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
164         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
165         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
166         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
167         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
168         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
169         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
170         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
171         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
172         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
173         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
174         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
175         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
176         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
177         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
178         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
179         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
180         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
181         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
182         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
183         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
184         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
185         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
186         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
187         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
188         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
189         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
190         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
191         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
192         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
193         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
194         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
195         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
196         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
197         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
198         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
199         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
200         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
201         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
202         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
203         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
204         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
205         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
206         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
207         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
215         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
220         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
225         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
228         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
229         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
230         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
231 };
232
233 static const u32 fiji_golden_common_all[] =
234 {
235         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
236         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
237         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
238         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
239         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
240         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
241         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
242         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
243         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
244         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
245 };
246
247 static const u32 golden_settings_fiji_a10[] =
248 {
249         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
250         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
251         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
252         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
253         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
254         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
255         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
256         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
257         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
258         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
259         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
260 };
261
262 static const u32 fiji_mgcg_cgcg_init[] =
263 {
264         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
265         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
266         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
267         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
268         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
269         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
270         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
271         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
272         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
273         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
274         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
275         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
276         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
277         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
278         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
279         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
280         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
281         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
282         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
283         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
284         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
285         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
286         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
287         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
288         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
289         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
290         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
291         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
292         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
293         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
294         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
296         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
297         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
298         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
299 };
300
301 static const u32 golden_settings_iceland_a11[] =
302 {
303         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
304         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
305         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
306         mmGB_GPU_ID, 0x0000000f, 0x00000000,
307         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
308         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
309         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
310         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
311         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
312         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
313         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
314         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
315         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
316         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
317         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
318 };
319
320 static const u32 iceland_golden_common_all[] =
321 {
322         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
323         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
324         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
325         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
326         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
327         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
328         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
329         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
330 };
331
332 static const u32 iceland_mgcg_cgcg_init[] =
333 {
334         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
335         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
336         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
337         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
338         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
339         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
340         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
341         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
342         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
343         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
344         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
345         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
346         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
347         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
348         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
349         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
350         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
351         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
352         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
353         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
354         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
355         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
356         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
357         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
358         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
359         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
360         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
361         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
363         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
364         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
365         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
366         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
367         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
368         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
369         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
370         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
371         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
372         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
373         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
374         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
375         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
376         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
377         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
378         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
379         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
380         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
381         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
382         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
383         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
384         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
385         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
386         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
387         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
388         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
389         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
390         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
391         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
392         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
393         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
394         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
395         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
396         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
397         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
398 };
399
400 static const u32 cz_golden_settings_a11[] =
401 {
402         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
403         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
404         mmGB_GPU_ID, 0x0000000f, 0x00000000,
405         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
406         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
407         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
408         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
409         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
410         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
411         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
412 };
413
414 static const u32 cz_golden_common_all[] =
415 {
416         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
417         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
418         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
419         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
420         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
421         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
422         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
423         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
424 };
425
426 static const u32 cz_mgcg_cgcg_init[] =
427 {
428         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
460         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
461         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
462         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
463         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
464         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
465         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
466         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
467         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
468         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
469         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
470         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
471         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
472         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
473         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
474         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
475         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
476         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
477         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
478         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
479         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
480         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
481         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
482         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
483         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
484         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
485         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
486         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
487         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
488         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
489         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
490         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
491         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
492         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
493         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
494         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
495         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
496         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
497         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
498         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
499         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
500         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
501         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
502         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
503 };
504
505 static const u32 stoney_golden_settings_a11[] =
506 {
507         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
508         mmGB_GPU_ID, 0x0000000f, 0x00000000,
509         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
510         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
511         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
512         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
513         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
516         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
517 };
518
519 static const u32 stoney_golden_common_all[] =
520 {
521         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
523         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
525         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
529 };
530
531 static const u32 stoney_mgcg_cgcg_init[] =
532 {
533         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
534         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
535         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
536         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
537         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
538         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
539 };
540
541 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
542 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
543 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
544
545 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
546 {
547         switch (adev->asic_type) {
548         case CHIP_TOPAZ:
549                 amdgpu_program_register_sequence(adev,
550                                                  iceland_mgcg_cgcg_init,
551                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
552                 amdgpu_program_register_sequence(adev,
553                                                  golden_settings_iceland_a11,
554                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
555                 amdgpu_program_register_sequence(adev,
556                                                  iceland_golden_common_all,
557                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
558                 break;
559         case CHIP_FIJI:
560                 amdgpu_program_register_sequence(adev,
561                                                  fiji_mgcg_cgcg_init,
562                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
563                 amdgpu_program_register_sequence(adev,
564                                                  golden_settings_fiji_a10,
565                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
566                 amdgpu_program_register_sequence(adev,
567                                                  fiji_golden_common_all,
568                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
569                 break;
570
571         case CHIP_TONGA:
572                 amdgpu_program_register_sequence(adev,
573                                                  tonga_mgcg_cgcg_init,
574                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
575                 amdgpu_program_register_sequence(adev,
576                                                  golden_settings_tonga_a11,
577                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
578                 amdgpu_program_register_sequence(adev,
579                                                  tonga_golden_common_all,
580                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
581                 break;
582         case CHIP_CARRIZO:
583                 amdgpu_program_register_sequence(adev,
584                                                  cz_mgcg_cgcg_init,
585                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
586                 amdgpu_program_register_sequence(adev,
587                                                  cz_golden_settings_a11,
588                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
589                 amdgpu_program_register_sequence(adev,
590                                                  cz_golden_common_all,
591                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
592                 break;
593         case CHIP_STONEY:
594                 amdgpu_program_register_sequence(adev,
595                                                  stoney_mgcg_cgcg_init,
596                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
597                 amdgpu_program_register_sequence(adev,
598                                                  stoney_golden_settings_a11,
599                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
600                 amdgpu_program_register_sequence(adev,
601                                                  stoney_golden_common_all,
602                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
603                 break;
604         default:
605                 break;
606         }
607 }
608
609 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
610 {
611         int i;
612
613         adev->gfx.scratch.num_reg = 7;
614         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
615         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
616                 adev->gfx.scratch.free[i] = true;
617                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
618         }
619 }
620
621 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
622 {
623         struct amdgpu_device *adev = ring->adev;
624         uint32_t scratch;
625         uint32_t tmp = 0;
626         unsigned i;
627         int r;
628
629         r = amdgpu_gfx_scratch_get(adev, &scratch);
630         if (r) {
631                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
632                 return r;
633         }
634         WREG32(scratch, 0xCAFEDEAD);
635         r = amdgpu_ring_lock(ring, 3);
636         if (r) {
637                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
638                           ring->idx, r);
639                 amdgpu_gfx_scratch_free(adev, scratch);
640                 return r;
641         }
642         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
643         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
644         amdgpu_ring_write(ring, 0xDEADBEEF);
645         amdgpu_ring_unlock_commit(ring);
646
647         for (i = 0; i < adev->usec_timeout; i++) {
648                 tmp = RREG32(scratch);
649                 if (tmp == 0xDEADBEEF)
650                         break;
651                 DRM_UDELAY(1);
652         }
653         if (i < adev->usec_timeout) {
654                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
655                          ring->idx, i);
656         } else {
657                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
658                           ring->idx, scratch, tmp);
659                 r = -EINVAL;
660         }
661         amdgpu_gfx_scratch_free(adev, scratch);
662         return r;
663 }
664
665 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
666 {
667         struct amdgpu_device *adev = ring->adev;
668         struct amdgpu_ib ib;
669         struct fence *f = NULL;
670         uint32_t scratch;
671         uint32_t tmp = 0;
672         unsigned i;
673         int r;
674
675         r = amdgpu_gfx_scratch_get(adev, &scratch);
676         if (r) {
677                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
678                 return r;
679         }
680         WREG32(scratch, 0xCAFEDEAD);
681         memset(&ib, 0, sizeof(ib));
682         r = amdgpu_ib_get(ring, NULL, 256, &ib);
683         if (r) {
684                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
685                 goto err1;
686         }
687         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
688         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
689         ib.ptr[2] = 0xDEADBEEF;
690         ib.length_dw = 3;
691
692         r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
693                                                  AMDGPU_FENCE_OWNER_UNDEFINED,
694                                                  &f);
695         if (r)
696                 goto err2;
697
698         r = fence_wait(f, false);
699         if (r) {
700                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
701                 goto err2;
702         }
703         for (i = 0; i < adev->usec_timeout; i++) {
704                 tmp = RREG32(scratch);
705                 if (tmp == 0xDEADBEEF)
706                         break;
707                 DRM_UDELAY(1);
708         }
709         if (i < adev->usec_timeout) {
710                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
711                          ring->idx, i);
712                 goto err2;
713         } else {
714                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
715                           scratch, tmp);
716                 r = -EINVAL;
717         }
718 err2:
719         fence_put(f);
720         amdgpu_ib_free(adev, &ib);
721 err1:
722         amdgpu_gfx_scratch_free(adev, scratch);
723         return r;
724 }
725
726 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
727 {
728         const char *chip_name;
729         char fw_name[30];
730         int err;
731         struct amdgpu_firmware_info *info = NULL;
732         const struct common_firmware_header *header = NULL;
733         const struct gfx_firmware_header_v1_0 *cp_hdr;
734
735         DRM_DEBUG("\n");
736
737         switch (adev->asic_type) {
738         case CHIP_TOPAZ:
739                 chip_name = "topaz";
740                 break;
741         case CHIP_TONGA:
742                 chip_name = "tonga";
743                 break;
744         case CHIP_CARRIZO:
745                 chip_name = "carrizo";
746                 break;
747         case CHIP_FIJI:
748                 chip_name = "fiji";
749                 break;
750         case CHIP_STONEY:
751                 chip_name = "stoney";
752                 break;
753         default:
754                 BUG();
755         }
756
757         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
758         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
759         if (err)
760                 goto out;
761         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
762         if (err)
763                 goto out;
764         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
765         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
766         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
767
768         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
769         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
770         if (err)
771                 goto out;
772         err = amdgpu_ucode_validate(adev->gfx.me_fw);
773         if (err)
774                 goto out;
775         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
776         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
777         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
778
779         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
780         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
781         if (err)
782                 goto out;
783         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
784         if (err)
785                 goto out;
786         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
787         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
788         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
789
790         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
791         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
792         if (err)
793                 goto out;
794         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
795         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
796         adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
797         adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
798
799         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
800         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
801         if (err)
802                 goto out;
803         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
804         if (err)
805                 goto out;
806         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
807         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
808         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
809
810         if (adev->asic_type != CHIP_STONEY) {
811                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
812                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
813                 if (!err) {
814                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
815                         if (err)
816                                 goto out;
817                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
818                                 adev->gfx.mec2_fw->data;
819                         adev->gfx.mec2_fw_version =
820                                 le32_to_cpu(cp_hdr->header.ucode_version);
821                         adev->gfx.mec2_feature_version =
822                                 le32_to_cpu(cp_hdr->ucode_feature_version);
823                 } else {
824                         err = 0;
825                         adev->gfx.mec2_fw = NULL;
826                 }
827         }
828
829         if (adev->firmware.smu_load) {
830                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
831                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
832                 info->fw = adev->gfx.pfp_fw;
833                 header = (const struct common_firmware_header *)info->fw->data;
834                 adev->firmware.fw_size +=
835                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
836
837                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
838                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
839                 info->fw = adev->gfx.me_fw;
840                 header = (const struct common_firmware_header *)info->fw->data;
841                 adev->firmware.fw_size +=
842                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
843
844                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
845                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
846                 info->fw = adev->gfx.ce_fw;
847                 header = (const struct common_firmware_header *)info->fw->data;
848                 adev->firmware.fw_size +=
849                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
850
851                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
852                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
853                 info->fw = adev->gfx.rlc_fw;
854                 header = (const struct common_firmware_header *)info->fw->data;
855                 adev->firmware.fw_size +=
856                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
857
858                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
859                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
860                 info->fw = adev->gfx.mec_fw;
861                 header = (const struct common_firmware_header *)info->fw->data;
862                 adev->firmware.fw_size +=
863                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
864
865                 if (adev->gfx.mec2_fw) {
866                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
867                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
868                         info->fw = adev->gfx.mec2_fw;
869                         header = (const struct common_firmware_header *)info->fw->data;
870                         adev->firmware.fw_size +=
871                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
872                 }
873
874         }
875
876 out:
877         if (err) {
878                 dev_err(adev->dev,
879                         "gfx8: Failed to load firmware \"%s\"\n",
880                         fw_name);
881                 release_firmware(adev->gfx.pfp_fw);
882                 adev->gfx.pfp_fw = NULL;
883                 release_firmware(adev->gfx.me_fw);
884                 adev->gfx.me_fw = NULL;
885                 release_firmware(adev->gfx.ce_fw);
886                 adev->gfx.ce_fw = NULL;
887                 release_firmware(adev->gfx.rlc_fw);
888                 adev->gfx.rlc_fw = NULL;
889                 release_firmware(adev->gfx.mec_fw);
890                 adev->gfx.mec_fw = NULL;
891                 release_firmware(adev->gfx.mec2_fw);
892                 adev->gfx.mec2_fw = NULL;
893         }
894         return err;
895 }
896
897 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
898 {
899         int r;
900
901         if (adev->gfx.mec.hpd_eop_obj) {
902                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
903                 if (unlikely(r != 0))
904                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
905                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
906                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
907
908                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
909                 adev->gfx.mec.hpd_eop_obj = NULL;
910         }
911 }
912
913 #define MEC_HPD_SIZE 2048
914
915 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
916 {
917         int r;
918         u32 *hpd;
919
920         /*
921          * we assign only 1 pipe because all other pipes will
922          * be handled by KFD
923          */
924         adev->gfx.mec.num_mec = 1;
925         adev->gfx.mec.num_pipe = 1;
926         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
927
928         if (adev->gfx.mec.hpd_eop_obj == NULL) {
929                 r = amdgpu_bo_create(adev,
930                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
931                                      PAGE_SIZE, true,
932                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
933                                      &adev->gfx.mec.hpd_eop_obj);
934                 if (r) {
935                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
936                         return r;
937                 }
938         }
939
940         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
941         if (unlikely(r != 0)) {
942                 gfx_v8_0_mec_fini(adev);
943                 return r;
944         }
945         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
946                           &adev->gfx.mec.hpd_eop_gpu_addr);
947         if (r) {
948                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
949                 gfx_v8_0_mec_fini(adev);
950                 return r;
951         }
952         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
953         if (r) {
954                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
955                 gfx_v8_0_mec_fini(adev);
956                 return r;
957         }
958
959         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
960
961         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
962         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
963
964         return 0;
965 }
966
967 static const u32 vgpr_init_compute_shader[] =
968 {
969         0x7e000209, 0x7e020208,
970         0x7e040207, 0x7e060206,
971         0x7e080205, 0x7e0a0204,
972         0x7e0c0203, 0x7e0e0202,
973         0x7e100201, 0x7e120200,
974         0x7e140209, 0x7e160208,
975         0x7e180207, 0x7e1a0206,
976         0x7e1c0205, 0x7e1e0204,
977         0x7e200203, 0x7e220202,
978         0x7e240201, 0x7e260200,
979         0x7e280209, 0x7e2a0208,
980         0x7e2c0207, 0x7e2e0206,
981         0x7e300205, 0x7e320204,
982         0x7e340203, 0x7e360202,
983         0x7e380201, 0x7e3a0200,
984         0x7e3c0209, 0x7e3e0208,
985         0x7e400207, 0x7e420206,
986         0x7e440205, 0x7e460204,
987         0x7e480203, 0x7e4a0202,
988         0x7e4c0201, 0x7e4e0200,
989         0x7e500209, 0x7e520208,
990         0x7e540207, 0x7e560206,
991         0x7e580205, 0x7e5a0204,
992         0x7e5c0203, 0x7e5e0202,
993         0x7e600201, 0x7e620200,
994         0x7e640209, 0x7e660208,
995         0x7e680207, 0x7e6a0206,
996         0x7e6c0205, 0x7e6e0204,
997         0x7e700203, 0x7e720202,
998         0x7e740201, 0x7e760200,
999         0x7e780209, 0x7e7a0208,
1000         0x7e7c0207, 0x7e7e0206,
1001         0xbf8a0000, 0xbf810000,
1002 };
1003
1004 static const u32 sgpr_init_compute_shader[] =
1005 {
1006         0xbe8a0100, 0xbe8c0102,
1007         0xbe8e0104, 0xbe900106,
1008         0xbe920108, 0xbe940100,
1009         0xbe960102, 0xbe980104,
1010         0xbe9a0106, 0xbe9c0108,
1011         0xbe9e0100, 0xbea00102,
1012         0xbea20104, 0xbea40106,
1013         0xbea60108, 0xbea80100,
1014         0xbeaa0102, 0xbeac0104,
1015         0xbeae0106, 0xbeb00108,
1016         0xbeb20100, 0xbeb40102,
1017         0xbeb60104, 0xbeb80106,
1018         0xbeba0108, 0xbebc0100,
1019         0xbebe0102, 0xbec00104,
1020         0xbec20106, 0xbec40108,
1021         0xbec60100, 0xbec80102,
1022         0xbee60004, 0xbee70005,
1023         0xbeea0006, 0xbeeb0007,
1024         0xbee80008, 0xbee90009,
1025         0xbefc0000, 0xbf8a0000,
1026         0xbf810000, 0x00000000,
1027 };
1028
1029 static const u32 vgpr_init_regs[] =
1030 {
1031         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1032         mmCOMPUTE_RESOURCE_LIMITS, 0,
1033         mmCOMPUTE_NUM_THREAD_X, 256*4,
1034         mmCOMPUTE_NUM_THREAD_Y, 1,
1035         mmCOMPUTE_NUM_THREAD_Z, 1,
1036         mmCOMPUTE_PGM_RSRC2, 20,
1037         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1038         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1039         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1040         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1041         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1042         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1043         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1044         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1045         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1046         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1047 };
1048
1049 static const u32 sgpr1_init_regs[] =
1050 {
1051         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1052         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1053         mmCOMPUTE_NUM_THREAD_X, 256*5,
1054         mmCOMPUTE_NUM_THREAD_Y, 1,
1055         mmCOMPUTE_NUM_THREAD_Z, 1,
1056         mmCOMPUTE_PGM_RSRC2, 20,
1057         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1058         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1059         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1060         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1061         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1062         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1063         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1064         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1065         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1066         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1067 };
1068
1069 static const u32 sgpr2_init_regs[] =
1070 {
1071         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1072         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1073         mmCOMPUTE_NUM_THREAD_X, 256*5,
1074         mmCOMPUTE_NUM_THREAD_Y, 1,
1075         mmCOMPUTE_NUM_THREAD_Z, 1,
1076         mmCOMPUTE_PGM_RSRC2, 20,
1077         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1078         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1079         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1080         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1081         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1082         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1083         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1084         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1085         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1086         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1087 };
1088
1089 static const u32 sec_ded_counter_registers[] =
1090 {
1091         mmCPC_EDC_ATC_CNT,
1092         mmCPC_EDC_SCRATCH_CNT,
1093         mmCPC_EDC_UCODE_CNT,
1094         mmCPF_EDC_ATC_CNT,
1095         mmCPF_EDC_ROQ_CNT,
1096         mmCPF_EDC_TAG_CNT,
1097         mmCPG_EDC_ATC_CNT,
1098         mmCPG_EDC_DMA_CNT,
1099         mmCPG_EDC_TAG_CNT,
1100         mmDC_EDC_CSINVOC_CNT,
1101         mmDC_EDC_RESTORE_CNT,
1102         mmDC_EDC_STATE_CNT,
1103         mmGDS_EDC_CNT,
1104         mmGDS_EDC_GRBM_CNT,
1105         mmGDS_EDC_OA_DED,
1106         mmSPI_EDC_CNT,
1107         mmSQC_ATC_EDC_GATCL1_CNT,
1108         mmSQC_EDC_CNT,
1109         mmSQ_EDC_DED_CNT,
1110         mmSQ_EDC_INFO,
1111         mmSQ_EDC_SEC_CNT,
1112         mmTCC_EDC_CNT,
1113         mmTCP_ATC_EDC_GATCL1_CNT,
1114         mmTCP_EDC_CNT,
1115         mmTD_EDC_CNT
1116 };
1117
1118 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1119 {
1120         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1121         struct amdgpu_ib ib;
1122         struct fence *f = NULL;
1123         int r, i;
1124         u32 tmp;
1125         unsigned total_size, vgpr_offset, sgpr_offset;
1126         u64 gpu_addr;
1127
1128         /* only supported on CZ */
1129         if (adev->asic_type != CHIP_CARRIZO)
1130                 return 0;
1131
1132         /* bail if the compute ring is not ready */
1133         if (!ring->ready)
1134                 return 0;
1135
1136         tmp = RREG32(mmGB_EDC_MODE);
1137         WREG32(mmGB_EDC_MODE, 0);
1138
1139         total_size =
1140                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1141         total_size +=
1142                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1143         total_size +=
1144                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1145         total_size = ALIGN(total_size, 256);
1146         vgpr_offset = total_size;
1147         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1148         sgpr_offset = total_size;
1149         total_size += sizeof(sgpr_init_compute_shader);
1150
1151         /* allocate an indirect buffer to put the commands in */
1152         memset(&ib, 0, sizeof(ib));
1153         r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1154         if (r) {
1155                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1156                 return r;
1157         }
1158
1159         /* load the compute shaders */
1160         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1161                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1162
1163         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1164                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1165
1166         /* init the ib length to 0 */
1167         ib.length_dw = 0;
1168
1169         /* VGPR */
1170         /* write the register state for the compute dispatch */
1171         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1172                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1173                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1174                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1175         }
1176         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1177         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1178         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1179         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1180         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1181         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1182
1183         /* write dispatch packet */
1184         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1185         ib.ptr[ib.length_dw++] = 8; /* x */
1186         ib.ptr[ib.length_dw++] = 1; /* y */
1187         ib.ptr[ib.length_dw++] = 1; /* z */
1188         ib.ptr[ib.length_dw++] =
1189                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1190
1191         /* write CS partial flush packet */
1192         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1193         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1194
1195         /* SGPR1 */
1196         /* write the register state for the compute dispatch */
1197         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1198                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1199                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1200                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1201         }
1202         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1203         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1204         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1205         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1206         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1207         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1208
1209         /* write dispatch packet */
1210         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1211         ib.ptr[ib.length_dw++] = 8; /* x */
1212         ib.ptr[ib.length_dw++] = 1; /* y */
1213         ib.ptr[ib.length_dw++] = 1; /* z */
1214         ib.ptr[ib.length_dw++] =
1215                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1216
1217         /* write CS partial flush packet */
1218         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1219         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1220
1221         /* SGPR2 */
1222         /* write the register state for the compute dispatch */
1223         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1224                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1225                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1226                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1227         }
1228         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1229         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1230         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1231         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1232         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1233         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1234
1235         /* write dispatch packet */
1236         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1237         ib.ptr[ib.length_dw++] = 8; /* x */
1238         ib.ptr[ib.length_dw++] = 1; /* y */
1239         ib.ptr[ib.length_dw++] = 1; /* z */
1240         ib.ptr[ib.length_dw++] =
1241                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1242
1243         /* write CS partial flush packet */
1244         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1245         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1246
1247         /* shedule the ib on the ring */
1248         r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1249                                                  AMDGPU_FENCE_OWNER_UNDEFINED,
1250                                                  &f);
1251         if (r) {
1252                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1253                 goto fail;
1254         }
1255
1256         /* wait for the GPU to finish processing the IB */
1257         r = fence_wait(f, false);
1258         if (r) {
1259                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1260                 goto fail;
1261         }
1262
1263         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1264         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1265         WREG32(mmGB_EDC_MODE, tmp);
1266
1267         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1268         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1269         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1270
1271
1272         /* read back registers to clear the counters */
1273         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1274                 RREG32(sec_ded_counter_registers[i]);
1275
1276 fail:
1277         fence_put(f);
1278         amdgpu_ib_free(adev, &ib);
1279
1280         return r;
1281 }
1282
1283 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1284 {
1285         u32 gb_addr_config;
1286         u32 mc_shared_chmap, mc_arb_ramcfg;
1287         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1288         u32 tmp;
1289
1290         switch (adev->asic_type) {
1291         case CHIP_TOPAZ:
1292                 adev->gfx.config.max_shader_engines = 1;
1293                 adev->gfx.config.max_tile_pipes = 2;
1294                 adev->gfx.config.max_cu_per_sh = 6;
1295                 adev->gfx.config.max_sh_per_se = 1;
1296                 adev->gfx.config.max_backends_per_se = 2;
1297                 adev->gfx.config.max_texture_channel_caches = 2;
1298                 adev->gfx.config.max_gprs = 256;
1299                 adev->gfx.config.max_gs_threads = 32;
1300                 adev->gfx.config.max_hw_contexts = 8;
1301
1302                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1303                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1304                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1305                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1306                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1307                 break;
1308         case CHIP_FIJI:
1309                 adev->gfx.config.max_shader_engines = 4;
1310                 adev->gfx.config.max_tile_pipes = 16;
1311                 adev->gfx.config.max_cu_per_sh = 16;
1312                 adev->gfx.config.max_sh_per_se = 1;
1313                 adev->gfx.config.max_backends_per_se = 4;
1314                 adev->gfx.config.max_texture_channel_caches = 16;
1315                 adev->gfx.config.max_gprs = 256;
1316                 adev->gfx.config.max_gs_threads = 32;
1317                 adev->gfx.config.max_hw_contexts = 8;
1318
1319                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1320                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1321                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1322                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1323                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1324                 break;
1325         case CHIP_TONGA:
1326                 adev->gfx.config.max_shader_engines = 4;
1327                 adev->gfx.config.max_tile_pipes = 8;
1328                 adev->gfx.config.max_cu_per_sh = 8;
1329                 adev->gfx.config.max_sh_per_se = 1;
1330                 adev->gfx.config.max_backends_per_se = 2;
1331                 adev->gfx.config.max_texture_channel_caches = 8;
1332                 adev->gfx.config.max_gprs = 256;
1333                 adev->gfx.config.max_gs_threads = 32;
1334                 adev->gfx.config.max_hw_contexts = 8;
1335
1336                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1337                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1338                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1339                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1340                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1341                 break;
1342         case CHIP_CARRIZO:
1343                 adev->gfx.config.max_shader_engines = 1;
1344                 adev->gfx.config.max_tile_pipes = 2;
1345                 adev->gfx.config.max_sh_per_se = 1;
1346                 adev->gfx.config.max_backends_per_se = 2;
1347
1348                 switch (adev->pdev->revision) {
1349                 case 0xc4:
1350                 case 0x84:
1351                 case 0xc8:
1352                 case 0xcc:
1353                 case 0xe1:
1354                 case 0xe3:
1355                         /* B10 */
1356                         adev->gfx.config.max_cu_per_sh = 8;
1357                         break;
1358                 case 0xc5:
1359                 case 0x81:
1360                 case 0x85:
1361                 case 0xc9:
1362                 case 0xcd:
1363                 case 0xe2:
1364                 case 0xe4:
1365                         /* B8 */
1366                         adev->gfx.config.max_cu_per_sh = 6;
1367                         break;
1368                 case 0xc6:
1369                 case 0xca:
1370                 case 0xce:
1371                 case 0x88:
1372                         /* B6 */
1373                         adev->gfx.config.max_cu_per_sh = 6;
1374                         break;
1375                 case 0xc7:
1376                 case 0x87:
1377                 case 0xcb:
1378                 case 0xe5:
1379                 case 0x89:
1380                 default:
1381                         /* B4 */
1382                         adev->gfx.config.max_cu_per_sh = 4;
1383                         break;
1384                 }
1385
1386                 adev->gfx.config.max_texture_channel_caches = 2;
1387                 adev->gfx.config.max_gprs = 256;
1388                 adev->gfx.config.max_gs_threads = 32;
1389                 adev->gfx.config.max_hw_contexts = 8;
1390
1391                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1392                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1393                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1394                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1395                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1396                 break;
1397         case CHIP_STONEY:
1398                 adev->gfx.config.max_shader_engines = 1;
1399                 adev->gfx.config.max_tile_pipes = 2;
1400                 adev->gfx.config.max_sh_per_se = 1;
1401                 adev->gfx.config.max_backends_per_se = 1;
1402
1403                 switch (adev->pdev->revision) {
1404                 case 0xc0:
1405                 case 0xc1:
1406                 case 0xc2:
1407                 case 0xc4:
1408                 case 0xc8:
1409                 case 0xc9:
1410                         adev->gfx.config.max_cu_per_sh = 3;
1411                         break;
1412                 case 0xd0:
1413                 case 0xd1:
1414                 case 0xd2:
1415                 default:
1416                         adev->gfx.config.max_cu_per_sh = 2;
1417                         break;
1418                 }
1419
1420                 adev->gfx.config.max_texture_channel_caches = 2;
1421                 adev->gfx.config.max_gprs = 256;
1422                 adev->gfx.config.max_gs_threads = 16;
1423                 adev->gfx.config.max_hw_contexts = 8;
1424
1425                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1426                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1427                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1428                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1429                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1430                 break;
1431         default:
1432                 adev->gfx.config.max_shader_engines = 2;
1433                 adev->gfx.config.max_tile_pipes = 4;
1434                 adev->gfx.config.max_cu_per_sh = 2;
1435                 adev->gfx.config.max_sh_per_se = 1;
1436                 adev->gfx.config.max_backends_per_se = 2;
1437                 adev->gfx.config.max_texture_channel_caches = 4;
1438                 adev->gfx.config.max_gprs = 256;
1439                 adev->gfx.config.max_gs_threads = 32;
1440                 adev->gfx.config.max_hw_contexts = 8;
1441
1442                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1443                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1444                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1445                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1446                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1447                 break;
1448         }
1449
1450         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1451         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1452         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1453
1454         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1455         adev->gfx.config.mem_max_burst_length_bytes = 256;
1456         if (adev->flags & AMD_IS_APU) {
1457                 /* Get memory bank mapping mode. */
1458                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1459                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1460                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1461
1462                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1463                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1464                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1465
1466                 /* Validate settings in case only one DIMM installed. */
1467                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1468                         dimm00_addr_map = 0;
1469                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1470                         dimm01_addr_map = 0;
1471                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1472                         dimm10_addr_map = 0;
1473                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1474                         dimm11_addr_map = 0;
1475
1476                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1477                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1478                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1479                         adev->gfx.config.mem_row_size_in_kb = 2;
1480                 else
1481                         adev->gfx.config.mem_row_size_in_kb = 1;
1482         } else {
1483                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1484                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1485                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1486                         adev->gfx.config.mem_row_size_in_kb = 4;
1487         }
1488
1489         adev->gfx.config.shader_engine_tile_size = 32;
1490         adev->gfx.config.num_gpus = 1;
1491         adev->gfx.config.multi_gpu_tile_size = 64;
1492
1493         /* fix up row size */
1494         switch (adev->gfx.config.mem_row_size_in_kb) {
1495         case 1:
1496         default:
1497                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1498                 break;
1499         case 2:
1500                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1501                 break;
1502         case 4:
1503                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1504                 break;
1505         }
1506         adev->gfx.config.gb_addr_config = gb_addr_config;
1507 }
1508
1509 static int gfx_v8_0_sw_init(void *handle)
1510 {
1511         int i, r;
1512         struct amdgpu_ring *ring;
1513         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1514
1515         /* EOP Event */
1516         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1517         if (r)
1518                 return r;
1519
1520         /* Privileged reg */
1521         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1522         if (r)
1523                 return r;
1524
1525         /* Privileged inst */
1526         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1527         if (r)
1528                 return r;
1529
1530         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1531
1532         gfx_v8_0_scratch_init(adev);
1533
1534         r = gfx_v8_0_init_microcode(adev);
1535         if (r) {
1536                 DRM_ERROR("Failed to load gfx firmware!\n");
1537                 return r;
1538         }
1539
1540         r = gfx_v8_0_mec_init(adev);
1541         if (r) {
1542                 DRM_ERROR("Failed to init MEC BOs!\n");
1543                 return r;
1544         }
1545
1546         /* set up the gfx ring */
1547         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1548                 ring = &adev->gfx.gfx_ring[i];
1549                 ring->ring_obj = NULL;
1550                 sprintf(ring->name, "gfx");
1551                 /* no gfx doorbells on iceland */
1552                 if (adev->asic_type != CHIP_TOPAZ) {
1553                         ring->use_doorbell = true;
1554                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1555                 }
1556
1557                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1558                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1559                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1560                                      AMDGPU_RING_TYPE_GFX);
1561                 if (r)
1562                         return r;
1563         }
1564
1565         /* set up the compute queues */
1566         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1567                 unsigned irq_type;
1568
1569                 /* max 32 queues per MEC */
1570                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1571                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1572                         break;
1573                 }
1574                 ring = &adev->gfx.compute_ring[i];
1575                 ring->ring_obj = NULL;
1576                 ring->use_doorbell = true;
1577                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1578                 ring->me = 1; /* first MEC */
1579                 ring->pipe = i / 8;
1580                 ring->queue = i % 8;
1581                 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1582                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1583                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1584                 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1585                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1586                                      &adev->gfx.eop_irq, irq_type,
1587                                      AMDGPU_RING_TYPE_COMPUTE);
1588                 if (r)
1589                         return r;
1590         }
1591
1592         /* reserve GDS, GWS and OA resource for gfx */
1593         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1594                         PAGE_SIZE, true,
1595                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1596                         NULL, &adev->gds.gds_gfx_bo);
1597         if (r)
1598                 return r;
1599
1600         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1601                 PAGE_SIZE, true,
1602                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1603                 NULL, &adev->gds.gws_gfx_bo);
1604         if (r)
1605                 return r;
1606
1607         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1608                         PAGE_SIZE, true,
1609                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1610                         NULL, &adev->gds.oa_gfx_bo);
1611         if (r)
1612                 return r;
1613
1614         adev->gfx.ce_ram_size = 0x8000;
1615
1616         gfx_v8_0_gpu_early_init(adev);
1617
1618         return 0;
1619 }
1620
1621 static int gfx_v8_0_sw_fini(void *handle)
1622 {
1623         int i;
1624         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1625
1626         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1627         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1628         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1629
1630         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1631                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1632         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1633                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1634
1635         gfx_v8_0_mec_fini(adev);
1636
1637         return 0;
1638 }
1639
1640 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1641 {
1642         uint32_t *modearray, *mod2array;
1643         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1644         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1645         u32 reg_offset;
1646
1647         modearray = adev->gfx.config.tile_mode_array;
1648         mod2array = adev->gfx.config.macrotile_mode_array;
1649
1650         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1651                 modearray[reg_offset] = 0;
1652
1653         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1654                 mod2array[reg_offset] = 0;
1655
1656         switch (adev->asic_type) {
1657         case CHIP_TOPAZ:
1658                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1659                                 PIPE_CONFIG(ADDR_SURF_P2) |
1660                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1661                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1662                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1663                                 PIPE_CONFIG(ADDR_SURF_P2) |
1664                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1665                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1666                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1667                                 PIPE_CONFIG(ADDR_SURF_P2) |
1668                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1669                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1670                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1671                                 PIPE_CONFIG(ADDR_SURF_P2) |
1672                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1673                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1674                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1675                                 PIPE_CONFIG(ADDR_SURF_P2) |
1676                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1677                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1678                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1679                                 PIPE_CONFIG(ADDR_SURF_P2) |
1680                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1681                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1682                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1683                                 PIPE_CONFIG(ADDR_SURF_P2) |
1684                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1685                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1686                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1687                                 PIPE_CONFIG(ADDR_SURF_P2));
1688                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1689                                 PIPE_CONFIG(ADDR_SURF_P2) |
1690                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1691                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1692                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1693                                  PIPE_CONFIG(ADDR_SURF_P2) |
1694                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1695                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1696                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1697                                  PIPE_CONFIG(ADDR_SURF_P2) |
1698                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1699                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1700                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1701                                  PIPE_CONFIG(ADDR_SURF_P2) |
1702                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1703                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1704                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1705                                  PIPE_CONFIG(ADDR_SURF_P2) |
1706                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1707                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1708                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1709                                  PIPE_CONFIG(ADDR_SURF_P2) |
1710                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1711                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1712                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1713                                  PIPE_CONFIG(ADDR_SURF_P2) |
1714                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1715                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1716                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1717                                  PIPE_CONFIG(ADDR_SURF_P2) |
1718                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1719                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1720                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1721                                  PIPE_CONFIG(ADDR_SURF_P2) |
1722                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1723                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1724                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1725                                  PIPE_CONFIG(ADDR_SURF_P2) |
1726                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1727                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1728                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1729                                  PIPE_CONFIG(ADDR_SURF_P2) |
1730                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1731                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1732                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1733                                  PIPE_CONFIG(ADDR_SURF_P2) |
1734                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1735                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1737                                  PIPE_CONFIG(ADDR_SURF_P2) |
1738                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1739                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1740                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1741                                  PIPE_CONFIG(ADDR_SURF_P2) |
1742                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1743                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1744                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1745                                  PIPE_CONFIG(ADDR_SURF_P2) |
1746                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1749                                  PIPE_CONFIG(ADDR_SURF_P2) |
1750                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1751                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1752                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1753                                  PIPE_CONFIG(ADDR_SURF_P2) |
1754                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1755                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1756                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1757                                  PIPE_CONFIG(ADDR_SURF_P2) |
1758                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1759                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1760
1761                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1762                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1763                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1764                                 NUM_BANKS(ADDR_SURF_8_BANK));
1765                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1766                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1767                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1768                                 NUM_BANKS(ADDR_SURF_8_BANK));
1769                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1770                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1771                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1772                                 NUM_BANKS(ADDR_SURF_8_BANK));
1773                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1774                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1775                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1776                                 NUM_BANKS(ADDR_SURF_8_BANK));
1777                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1778                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1779                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1780                                 NUM_BANKS(ADDR_SURF_8_BANK));
1781                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1782                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1783                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1784                                 NUM_BANKS(ADDR_SURF_8_BANK));
1785                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1786                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1787                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1788                                 NUM_BANKS(ADDR_SURF_8_BANK));
1789                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1790                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1791                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1792                                 NUM_BANKS(ADDR_SURF_16_BANK));
1793                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1794                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1795                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1796                                 NUM_BANKS(ADDR_SURF_16_BANK));
1797                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1798                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1799                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1800                                  NUM_BANKS(ADDR_SURF_16_BANK));
1801                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1802                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1803                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1804                                  NUM_BANKS(ADDR_SURF_16_BANK));
1805                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1806                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1807                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1808                                  NUM_BANKS(ADDR_SURF_16_BANK));
1809                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1810                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1811                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1812                                  NUM_BANKS(ADDR_SURF_16_BANK));
1813                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1814                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1815                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1816                                  NUM_BANKS(ADDR_SURF_8_BANK));
1817
1818                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1819                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1820                             reg_offset != 23)
1821                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1822
1823                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1824                         if (reg_offset != 7)
1825                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1826
1827                 break;
1828         case CHIP_FIJI:
1829                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1831                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1833                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1834                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1835                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1837                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1838                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1839                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1841                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1842                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1843                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1845                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1847                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1849                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1850                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1851                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1853                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1854                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1855                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1857                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1858                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1859                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1861                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1862                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1863                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1864                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1867                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1868                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1869                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1870                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1871                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1872                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1873                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1874                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1875                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1876                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1877                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1878                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1879                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1880                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1881                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1882                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1884                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1885                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1886                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1887                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1888                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1889                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1890                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1891                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1892                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1893                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1894                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1895                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1896                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1897                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1898                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1899                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1900                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1901                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1902                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1903                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1904                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1905                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1906                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1907                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1908                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1909                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1910                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1911                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1912                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1913                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1914                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1915                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1916                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1917                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1918                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1919                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1920                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1921                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1922                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1923                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1924                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1925                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1926                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1927                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1928                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1929                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1930                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1931                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1932                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1933                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1934                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1935                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1936                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1937                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1938                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1939                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1941                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1942                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1943                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1944                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1945                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1946                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1947                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1948                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1949                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1950                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1951
1952                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1953                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1954                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1955                                 NUM_BANKS(ADDR_SURF_8_BANK));
1956                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1957                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1958                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1959                                 NUM_BANKS(ADDR_SURF_8_BANK));
1960                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1963                                 NUM_BANKS(ADDR_SURF_8_BANK));
1964                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1967                                 NUM_BANKS(ADDR_SURF_8_BANK));
1968                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1971                                 NUM_BANKS(ADDR_SURF_8_BANK));
1972                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1975                                 NUM_BANKS(ADDR_SURF_8_BANK));
1976                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1979                                 NUM_BANKS(ADDR_SURF_8_BANK));
1980                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1983                                 NUM_BANKS(ADDR_SURF_8_BANK));
1984                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1987                                 NUM_BANKS(ADDR_SURF_8_BANK));
1988                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1989                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1990                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1991                                  NUM_BANKS(ADDR_SURF_8_BANK));
1992                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1993                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1994                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1995                                  NUM_BANKS(ADDR_SURF_8_BANK));
1996                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1997                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1998                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1999                                  NUM_BANKS(ADDR_SURF_8_BANK));
2000                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2001                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2002                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2003                                  NUM_BANKS(ADDR_SURF_8_BANK));
2004                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2005                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2006                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2007                                  NUM_BANKS(ADDR_SURF_4_BANK));
2008
2009                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2010                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2011
2012                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2013                         if (reg_offset != 7)
2014                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2015
2016                 break;
2017         case CHIP_TONGA:
2018                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2020                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2021                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2022                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2023                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2024                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2025                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2026                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2027                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2028                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2029                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2030                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2031                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2032                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2033                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2034                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2036                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2037                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2038                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2039                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2040                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2043                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2044                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2045                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2046                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2047                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2049                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2050                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2051                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2052                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2053                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2054                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2055                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2056                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2057                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2058                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2060                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2062                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2063                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2064                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2065                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2066                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2067                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2068                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2069                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2070                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2071                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2073                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2074                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2077                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2078                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2079                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2081                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2082                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2083                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2084                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2085                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2086                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2087                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2088                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2089                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2090                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2092                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2093                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2094                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2095                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2096                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2097                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2099                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2100                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2101                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2102                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2103                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2104                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2105                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2106                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2107                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2108                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2109                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2110                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2111                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2112                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2113                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2114                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2115                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2116                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2117                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2118                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2119                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2120                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2121                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2122                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2123                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2124                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2126                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2127                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2130                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2131                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2133                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2134                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2135                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2136                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2140
2141                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2142                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2143                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2144                                 NUM_BANKS(ADDR_SURF_16_BANK));
2145                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2146                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2147                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2148                                 NUM_BANKS(ADDR_SURF_16_BANK));
2149                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2151                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2152                                 NUM_BANKS(ADDR_SURF_16_BANK));
2153                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2154                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2155                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2156                                 NUM_BANKS(ADDR_SURF_16_BANK));
2157                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2158                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2159                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2160                                 NUM_BANKS(ADDR_SURF_16_BANK));
2161                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164                                 NUM_BANKS(ADDR_SURF_16_BANK));
2165                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2167                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2168                                 NUM_BANKS(ADDR_SURF_16_BANK));
2169                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2171                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2172                                 NUM_BANKS(ADDR_SURF_16_BANK));
2173                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2175                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2176                                 NUM_BANKS(ADDR_SURF_16_BANK));
2177                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2179                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2180                                  NUM_BANKS(ADDR_SURF_16_BANK));
2181                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2182                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2183                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2184                                  NUM_BANKS(ADDR_SURF_16_BANK));
2185                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188                                  NUM_BANKS(ADDR_SURF_8_BANK));
2189                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2191                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2192                                  NUM_BANKS(ADDR_SURF_4_BANK));
2193                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2195                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2196                                  NUM_BANKS(ADDR_SURF_4_BANK));
2197
2198                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2199                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2200
2201                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2202                         if (reg_offset != 7)
2203                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2204
2205                 break;
2206         case CHIP_STONEY:
2207                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2208                                 PIPE_CONFIG(ADDR_SURF_P2) |
2209                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2210                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2211                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212                                 PIPE_CONFIG(ADDR_SURF_P2) |
2213                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2214                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2215                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2216                                 PIPE_CONFIG(ADDR_SURF_P2) |
2217                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2218                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2219                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220                                 PIPE_CONFIG(ADDR_SURF_P2) |
2221                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2222                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2223                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224                                 PIPE_CONFIG(ADDR_SURF_P2) |
2225                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2226                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2228                                 PIPE_CONFIG(ADDR_SURF_P2) |
2229                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2230                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2232                                 PIPE_CONFIG(ADDR_SURF_P2) |
2233                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2234                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2236                                 PIPE_CONFIG(ADDR_SURF_P2));
2237                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2238                                 PIPE_CONFIG(ADDR_SURF_P2) |
2239                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2240                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242                                  PIPE_CONFIG(ADDR_SURF_P2) |
2243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2245                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2246                                  PIPE_CONFIG(ADDR_SURF_P2) |
2247                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2248                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2249                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2250                                  PIPE_CONFIG(ADDR_SURF_P2) |
2251                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2252                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254                                  PIPE_CONFIG(ADDR_SURF_P2) |
2255                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2256                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2257                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2258                                  PIPE_CONFIG(ADDR_SURF_P2) |
2259                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2260                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2262                                  PIPE_CONFIG(ADDR_SURF_P2) |
2263                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2264                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2265                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2266                                  PIPE_CONFIG(ADDR_SURF_P2) |
2267                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2269                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2270                                  PIPE_CONFIG(ADDR_SURF_P2) |
2271                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2272                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2273                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2274                                  PIPE_CONFIG(ADDR_SURF_P2) |
2275                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2276                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2277                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2278                                  PIPE_CONFIG(ADDR_SURF_P2) |
2279                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2280                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2281                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2282                                  PIPE_CONFIG(ADDR_SURF_P2) |
2283                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2284                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2286                                  PIPE_CONFIG(ADDR_SURF_P2) |
2287                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2288                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2290                                  PIPE_CONFIG(ADDR_SURF_P2) |
2291                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2294                                  PIPE_CONFIG(ADDR_SURF_P2) |
2295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2298                                  PIPE_CONFIG(ADDR_SURF_P2) |
2299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2301                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302                                  PIPE_CONFIG(ADDR_SURF_P2) |
2303                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2304                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2306                                  PIPE_CONFIG(ADDR_SURF_P2) |
2307                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2308                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2309
2310                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2311                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2312                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2313                                 NUM_BANKS(ADDR_SURF_8_BANK));
2314                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2316                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2317                                 NUM_BANKS(ADDR_SURF_8_BANK));
2318                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2320                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2321                                 NUM_BANKS(ADDR_SURF_8_BANK));
2322                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2323                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2324                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2325                                 NUM_BANKS(ADDR_SURF_8_BANK));
2326                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2328                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2329                                 NUM_BANKS(ADDR_SURF_8_BANK));
2330                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2331                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2332                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2333                                 NUM_BANKS(ADDR_SURF_8_BANK));
2334                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2336                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337                                 NUM_BANKS(ADDR_SURF_8_BANK));
2338                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2339                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2340                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2341                                 NUM_BANKS(ADDR_SURF_16_BANK));
2342                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2343                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2345                                 NUM_BANKS(ADDR_SURF_16_BANK));
2346                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2347                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2348                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2349                                  NUM_BANKS(ADDR_SURF_16_BANK));
2350                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2351                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2352                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2353                                  NUM_BANKS(ADDR_SURF_16_BANK));
2354                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2356                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2357                                  NUM_BANKS(ADDR_SURF_16_BANK));
2358                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2360                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361                                  NUM_BANKS(ADDR_SURF_16_BANK));
2362                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2364                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365                                  NUM_BANKS(ADDR_SURF_8_BANK));
2366
2367                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2368                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2369                             reg_offset != 23)
2370                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2371
2372                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2373                         if (reg_offset != 7)
2374                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2375
2376                 break;
2377         default:
2378                 dev_warn(adev->dev,
2379                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2380                          adev->asic_type);
2381
2382         case CHIP_CARRIZO:
2383                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384                                 PIPE_CONFIG(ADDR_SURF_P2) |
2385                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2386                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2387                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2388                                 PIPE_CONFIG(ADDR_SURF_P2) |
2389                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2390                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2391                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392                                 PIPE_CONFIG(ADDR_SURF_P2) |
2393                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2394                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2395                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396                                 PIPE_CONFIG(ADDR_SURF_P2) |
2397                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2398                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2399                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400                                 PIPE_CONFIG(ADDR_SURF_P2) |
2401                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2402                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404                                 PIPE_CONFIG(ADDR_SURF_P2) |
2405                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2406                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2408                                 PIPE_CONFIG(ADDR_SURF_P2) |
2409                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2410                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2412                                 PIPE_CONFIG(ADDR_SURF_P2));
2413                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414                                 PIPE_CONFIG(ADDR_SURF_P2) |
2415                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                                  PIPE_CONFIG(ADDR_SURF_P2) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                                  PIPE_CONFIG(ADDR_SURF_P2) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2425                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426                                  PIPE_CONFIG(ADDR_SURF_P2) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430                                  PIPE_CONFIG(ADDR_SURF_P2) |
2431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P2) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438                                  PIPE_CONFIG(ADDR_SURF_P2) |
2439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2442                                  PIPE_CONFIG(ADDR_SURF_P2) |
2443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2445                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2446                                  PIPE_CONFIG(ADDR_SURF_P2) |
2447                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2448                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2449                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2450                                  PIPE_CONFIG(ADDR_SURF_P2) |
2451                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2452                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2453                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2454                                  PIPE_CONFIG(ADDR_SURF_P2) |
2455                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2456                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2457                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2458                                  PIPE_CONFIG(ADDR_SURF_P2) |
2459                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2460                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2461                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2462                                  PIPE_CONFIG(ADDR_SURF_P2) |
2463                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2464                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2465                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2466                                  PIPE_CONFIG(ADDR_SURF_P2) |
2467                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2468                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2469                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2470                                  PIPE_CONFIG(ADDR_SURF_P2) |
2471                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2472                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474                                  PIPE_CONFIG(ADDR_SURF_P2) |
2475                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2476                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2477                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2478                                  PIPE_CONFIG(ADDR_SURF_P2) |
2479                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2480                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2481                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2482                                  PIPE_CONFIG(ADDR_SURF_P2) |
2483                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2485
2486                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2489                                 NUM_BANKS(ADDR_SURF_8_BANK));
2490                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493                                 NUM_BANKS(ADDR_SURF_8_BANK));
2494                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497                                 NUM_BANKS(ADDR_SURF_8_BANK));
2498                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2501                                 NUM_BANKS(ADDR_SURF_8_BANK));
2502                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2505                                 NUM_BANKS(ADDR_SURF_8_BANK));
2506                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2509                                 NUM_BANKS(ADDR_SURF_8_BANK));
2510                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2513                                 NUM_BANKS(ADDR_SURF_8_BANK));
2514                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2515                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2516                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517                                 NUM_BANKS(ADDR_SURF_16_BANK));
2518                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2519                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2520                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2521                                 NUM_BANKS(ADDR_SURF_16_BANK));
2522                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2523                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2524                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2525                                  NUM_BANKS(ADDR_SURF_16_BANK));
2526                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2527                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529                                  NUM_BANKS(ADDR_SURF_16_BANK));
2530                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2533                                  NUM_BANKS(ADDR_SURF_16_BANK));
2534                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2536                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2537                                  NUM_BANKS(ADDR_SURF_16_BANK));
2538                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541                                  NUM_BANKS(ADDR_SURF_8_BANK));
2542
2543                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2544                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2545                             reg_offset != 23)
2546                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2547
2548                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2549                         if (reg_offset != 7)
2550                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2551
2552                 break;
2553         }
2554 }
2555
2556 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2557 {
2558         return (u32)((1ULL << bit_width) - 1);
2559 }
2560
2561 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2562 {
2563         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2564
2565         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2566                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2567                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2568         } else if (se_num == 0xffffffff) {
2569                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2570                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2571         } else if (sh_num == 0xffffffff) {
2572                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2573                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2574         } else {
2575                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2576                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2577         }
2578         WREG32(mmGRBM_GFX_INDEX, data);
2579 }
2580
2581 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2582                                     u32 max_rb_num_per_se,
2583                                     u32 sh_per_se)
2584 {
2585         u32 data, mask;
2586
2587         data = RREG32(mmCC_RB_BACKEND_DISABLE);
2588         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2589
2590         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2591
2592         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2593
2594         mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2595
2596         return data & mask;
2597 }
2598
2599 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2600                               u32 se_num, u32 sh_per_se,
2601                               u32 max_rb_num_per_se)
2602 {
2603         int i, j;
2604         u32 data, mask;
2605         u32 disabled_rbs = 0;
2606         u32 enabled_rbs = 0;
2607
2608         mutex_lock(&adev->grbm_idx_mutex);
2609         for (i = 0; i < se_num; i++) {
2610                 for (j = 0; j < sh_per_se; j++) {
2611                         gfx_v8_0_select_se_sh(adev, i, j);
2612                         data = gfx_v8_0_get_rb_disabled(adev,
2613                                               max_rb_num_per_se, sh_per_se);
2614                         disabled_rbs |= data << ((i * sh_per_se + j) *
2615                                                  RB_BITMAP_WIDTH_PER_SH);
2616                 }
2617         }
2618         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2619         mutex_unlock(&adev->grbm_idx_mutex);
2620
2621         mask = 1;
2622         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2623                 if (!(disabled_rbs & mask))
2624                         enabled_rbs |= mask;
2625                 mask <<= 1;
2626         }
2627
2628         adev->gfx.config.backend_enable_mask = enabled_rbs;
2629
2630         mutex_lock(&adev->grbm_idx_mutex);
2631         for (i = 0; i < se_num; i++) {
2632                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2633                 data = RREG32(mmPA_SC_RASTER_CONFIG);
2634                 for (j = 0; j < sh_per_se; j++) {
2635                         switch (enabled_rbs & 3) {
2636                         case 0:
2637                                 if (j == 0)
2638                                         data |= (RASTER_CONFIG_RB_MAP_3 <<
2639                                                  PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2640                                 else
2641                                         data |= (RASTER_CONFIG_RB_MAP_0 <<
2642                                                  PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2643                                 break;
2644                         case 1:
2645                                 data |= (RASTER_CONFIG_RB_MAP_0 <<
2646                                          (i * sh_per_se + j) * 2);
2647                                 break;
2648                         case 2:
2649                                 data |= (RASTER_CONFIG_RB_MAP_3 <<
2650                                          (i * sh_per_se + j) * 2);
2651                                 break;
2652                         case 3:
2653                         default:
2654                                 data |= (RASTER_CONFIG_RB_MAP_2 <<
2655                                          (i * sh_per_se + j) * 2);
2656                                 break;
2657                         }
2658                         enabled_rbs >>= 2;
2659                 }
2660                 WREG32(mmPA_SC_RASTER_CONFIG, data);
2661         }
2662         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2663         mutex_unlock(&adev->grbm_idx_mutex);
2664 }
2665
2666 /**
2667  * gfx_v8_0_init_compute_vmid - gart enable
2668  *
2669  * @rdev: amdgpu_device pointer
2670  *
2671  * Initialize compute vmid sh_mem registers
2672  *
2673  */
2674 #define DEFAULT_SH_MEM_BASES    (0x6000)
2675 #define FIRST_COMPUTE_VMID      (8)
2676 #define LAST_COMPUTE_VMID       (16)
2677 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2678 {
2679         int i;
2680         uint32_t sh_mem_config;
2681         uint32_t sh_mem_bases;
2682
2683         /*
2684          * Configure apertures:
2685          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2686          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2687          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2688          */
2689         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2690
2691         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2692                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2693                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2694                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2695                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2696                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2697
2698         mutex_lock(&adev->srbm_mutex);
2699         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2700                 vi_srbm_select(adev, 0, 0, 0, i);
2701                 /* CP and shaders */
2702                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2703                 WREG32(mmSH_MEM_APE1_BASE, 1);
2704                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2705                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2706         }
2707         vi_srbm_select(adev, 0, 0, 0, 0);
2708         mutex_unlock(&adev->srbm_mutex);
2709 }
2710
2711 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2712 {
2713         u32 tmp;
2714         int i;
2715
2716         tmp = RREG32(mmGRBM_CNTL);
2717         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2718         WREG32(mmGRBM_CNTL, tmp);
2719
2720         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2721         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2722         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2723         WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2724                adev->gfx.config.gb_addr_config & 0x70);
2725         WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2726                adev->gfx.config.gb_addr_config & 0x70);
2727         WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2728         WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2729         WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2730
2731         gfx_v8_0_tiling_mode_table_init(adev);
2732
2733         gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2734                                  adev->gfx.config.max_sh_per_se,
2735                                  adev->gfx.config.max_backends_per_se);
2736
2737         /* XXX SH_MEM regs */
2738         /* where to put LDS, scratch, GPUVM in FSA64 space */
2739         mutex_lock(&adev->srbm_mutex);
2740         for (i = 0; i < 16; i++) {
2741                 vi_srbm_select(adev, 0, 0, 0, i);
2742                 /* CP and shaders */
2743                 if (i == 0) {
2744                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2745                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2746                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2747                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2748                         WREG32(mmSH_MEM_CONFIG, tmp);
2749                 } else {
2750                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2751                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2752                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2753                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2754                         WREG32(mmSH_MEM_CONFIG, tmp);
2755                 }
2756
2757                 WREG32(mmSH_MEM_APE1_BASE, 1);
2758                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2759                 WREG32(mmSH_MEM_BASES, 0);
2760         }
2761         vi_srbm_select(adev, 0, 0, 0, 0);
2762         mutex_unlock(&adev->srbm_mutex);
2763
2764         gfx_v8_0_init_compute_vmid(adev);
2765
2766         mutex_lock(&adev->grbm_idx_mutex);
2767         /*
2768          * making sure that the following register writes will be broadcasted
2769          * to all the shaders
2770          */
2771         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2772
2773         WREG32(mmPA_SC_FIFO_SIZE,
2774                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
2775                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2776                    (adev->gfx.config.sc_prim_fifo_size_backend <<
2777                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2778                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
2779                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2780                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2781                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2782         mutex_unlock(&adev->grbm_idx_mutex);
2783
2784 }
2785
2786 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2787 {
2788         u32 i, j, k;
2789         u32 mask;
2790
2791         mutex_lock(&adev->grbm_idx_mutex);
2792         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2793                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2794                         gfx_v8_0_select_se_sh(adev, i, j);
2795                         for (k = 0; k < adev->usec_timeout; k++) {
2796                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2797                                         break;
2798                                 udelay(1);
2799                         }
2800                 }
2801         }
2802         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2803         mutex_unlock(&adev->grbm_idx_mutex);
2804
2805         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2806                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2807                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2808                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2809         for (k = 0; k < adev->usec_timeout; k++) {
2810                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2811                         break;
2812                 udelay(1);
2813         }
2814 }
2815
2816 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2817                                                bool enable)
2818 {
2819         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2820
2821         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2822         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2823         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2824         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2825
2826         WREG32(mmCP_INT_CNTL_RING0, tmp);
2827 }
2828
2829 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2830 {
2831         u32 tmp = RREG32(mmRLC_CNTL);
2832
2833         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2834         WREG32(mmRLC_CNTL, tmp);
2835
2836         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2837
2838         gfx_v8_0_wait_for_rlc_serdes(adev);
2839 }
2840
2841 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2842 {
2843         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2844
2845         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2846         WREG32(mmGRBM_SOFT_RESET, tmp);
2847         udelay(50);
2848         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2849         WREG32(mmGRBM_SOFT_RESET, tmp);
2850         udelay(50);
2851 }
2852
2853 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2854 {
2855         u32 tmp = RREG32(mmRLC_CNTL);
2856
2857         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2858         WREG32(mmRLC_CNTL, tmp);
2859
2860         /* carrizo do enable cp interrupt after cp inited */
2861         if (!(adev->flags & AMD_IS_APU))
2862                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2863
2864         udelay(50);
2865 }
2866
2867 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2868 {
2869         const struct rlc_firmware_header_v2_0 *hdr;
2870         const __le32 *fw_data;
2871         unsigned i, fw_size;
2872
2873         if (!adev->gfx.rlc_fw)
2874                 return -EINVAL;
2875
2876         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2877         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2878
2879         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2880                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2881         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2882
2883         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2884         for (i = 0; i < fw_size; i++)
2885                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2886         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2887
2888         return 0;
2889 }
2890
2891 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2892 {
2893         int r;
2894
2895         gfx_v8_0_rlc_stop(adev);
2896
2897         /* disable CG */
2898         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2899
2900         /* disable PG */
2901         WREG32(mmRLC_PG_CNTL, 0);
2902
2903         gfx_v8_0_rlc_reset(adev);
2904
2905         if (!adev->pp_enabled) {
2906                 if (!adev->firmware.smu_load) {
2907                         /* legacy rlc firmware loading */
2908                         r = gfx_v8_0_rlc_load_microcode(adev);
2909                         if (r)
2910                                 return r;
2911                 } else {
2912                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2913                                                         AMDGPU_UCODE_ID_RLC_G);
2914                         if (r)
2915                                 return -EINVAL;
2916                 }
2917         }
2918
2919         gfx_v8_0_rlc_start(adev);
2920
2921         return 0;
2922 }
2923
2924 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2925 {
2926         int i;
2927         u32 tmp = RREG32(mmCP_ME_CNTL);
2928
2929         if (enable) {
2930                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2931                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2932                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2933         } else {
2934                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2935                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2936                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2937                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2938                         adev->gfx.gfx_ring[i].ready = false;
2939         }
2940         WREG32(mmCP_ME_CNTL, tmp);
2941         udelay(50);
2942 }
2943
2944 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2945 {
2946         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2947         const struct gfx_firmware_header_v1_0 *ce_hdr;
2948         const struct gfx_firmware_header_v1_0 *me_hdr;
2949         const __le32 *fw_data;
2950         unsigned i, fw_size;
2951
2952         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2953                 return -EINVAL;
2954
2955         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2956                 adev->gfx.pfp_fw->data;
2957         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2958                 adev->gfx.ce_fw->data;
2959         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2960                 adev->gfx.me_fw->data;
2961
2962         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2963         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2964         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2965
2966         gfx_v8_0_cp_gfx_enable(adev, false);
2967
2968         /* PFP */
2969         fw_data = (const __le32 *)
2970                 (adev->gfx.pfp_fw->data +
2971                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2972         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2973         WREG32(mmCP_PFP_UCODE_ADDR, 0);
2974         for (i = 0; i < fw_size; i++)
2975                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2976         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2977
2978         /* CE */
2979         fw_data = (const __le32 *)
2980                 (adev->gfx.ce_fw->data +
2981                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2982         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2983         WREG32(mmCP_CE_UCODE_ADDR, 0);
2984         for (i = 0; i < fw_size; i++)
2985                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2986         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2987
2988         /* ME */
2989         fw_data = (const __le32 *)
2990                 (adev->gfx.me_fw->data +
2991                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2992         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2993         WREG32(mmCP_ME_RAM_WADDR, 0);
2994         for (i = 0; i < fw_size; i++)
2995                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2996         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2997
2998         return 0;
2999 }
3000
3001 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3002 {
3003         u32 count = 0;
3004         const struct cs_section_def *sect = NULL;
3005         const struct cs_extent_def *ext = NULL;
3006
3007         /* begin clear state */
3008         count += 2;
3009         /* context control state */
3010         count += 3;
3011
3012         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3013                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3014                         if (sect->id == SECT_CONTEXT)
3015                                 count += 2 + ext->reg_count;
3016                         else
3017                                 return 0;
3018                 }
3019         }
3020         /* pa_sc_raster_config/pa_sc_raster_config1 */
3021         count += 4;
3022         /* end clear state */
3023         count += 2;
3024         /* clear state */
3025         count += 2;
3026
3027         return count;
3028 }
3029
3030 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3031 {
3032         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3033         const struct cs_section_def *sect = NULL;
3034         const struct cs_extent_def *ext = NULL;
3035         int r, i;
3036
3037         /* init the CP */
3038         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3039         WREG32(mmCP_ENDIAN_SWAP, 0);
3040         WREG32(mmCP_DEVICE_ID, 1);
3041
3042         gfx_v8_0_cp_gfx_enable(adev, true);
3043
3044         r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
3045         if (r) {
3046                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3047                 return r;
3048         }
3049
3050         /* clear state buffer */
3051         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3052         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3053
3054         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3055         amdgpu_ring_write(ring, 0x80000000);
3056         amdgpu_ring_write(ring, 0x80000000);
3057
3058         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3059                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3060                         if (sect->id == SECT_CONTEXT) {
3061                                 amdgpu_ring_write(ring,
3062                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3063                                                ext->reg_count));
3064                                 amdgpu_ring_write(ring,
3065                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3066                                 for (i = 0; i < ext->reg_count; i++)
3067                                         amdgpu_ring_write(ring, ext->extent[i]);
3068                         }
3069                 }
3070         }
3071
3072         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3073         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3074         switch (adev->asic_type) {
3075         case CHIP_TONGA:
3076                 amdgpu_ring_write(ring, 0x16000012);
3077                 amdgpu_ring_write(ring, 0x0000002A);
3078                 break;
3079         case CHIP_FIJI:
3080                 amdgpu_ring_write(ring, 0x3a00161a);
3081                 amdgpu_ring_write(ring, 0x0000002e);
3082                 break;
3083         case CHIP_TOPAZ:
3084         case CHIP_CARRIZO:
3085                 amdgpu_ring_write(ring, 0x00000002);
3086                 amdgpu_ring_write(ring, 0x00000000);
3087                 break;
3088         case CHIP_STONEY:
3089                 amdgpu_ring_write(ring, 0x00000000);
3090                 amdgpu_ring_write(ring, 0x00000000);
3091                 break;
3092         default:
3093                 BUG();
3094         }
3095
3096         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3097         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3098
3099         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3100         amdgpu_ring_write(ring, 0);
3101
3102         /* init the CE partitions */
3103         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3104         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3105         amdgpu_ring_write(ring, 0x8000);
3106         amdgpu_ring_write(ring, 0x8000);
3107
3108         amdgpu_ring_unlock_commit(ring);
3109
3110         return 0;
3111 }
3112
3113 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3114 {
3115         struct amdgpu_ring *ring;
3116         u32 tmp;
3117         u32 rb_bufsz;
3118         u64 rb_addr, rptr_addr;
3119         int r;
3120
3121         /* Set the write pointer delay */
3122         WREG32(mmCP_RB_WPTR_DELAY, 0);
3123
3124         /* set the RB to use vmid 0 */
3125         WREG32(mmCP_RB_VMID, 0);
3126
3127         /* Set ring buffer size */
3128         ring = &adev->gfx.gfx_ring[0];
3129         rb_bufsz = order_base_2(ring->ring_size / 8);
3130         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3131         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3132         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3133         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3134 #ifdef __BIG_ENDIAN
3135         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3136 #endif
3137         WREG32(mmCP_RB0_CNTL, tmp);
3138
3139         /* Initialize the ring buffer's read and write pointers */
3140         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3141         ring->wptr = 0;
3142         WREG32(mmCP_RB0_WPTR, ring->wptr);
3143
3144         /* set the wb address wether it's enabled or not */
3145         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3146         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3147         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3148
3149         mdelay(1);
3150         WREG32(mmCP_RB0_CNTL, tmp);
3151
3152         rb_addr = ring->gpu_addr >> 8;
3153         WREG32(mmCP_RB0_BASE, rb_addr);
3154         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3155
3156         /* no gfx doorbells on iceland */
3157         if (adev->asic_type != CHIP_TOPAZ) {
3158                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3159                 if (ring->use_doorbell) {
3160                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3161                                             DOORBELL_OFFSET, ring->doorbell_index);
3162                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3163                                             DOORBELL_EN, 1);
3164                 } else {
3165                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3166                                             DOORBELL_EN, 0);
3167                 }
3168                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3169
3170                 if (adev->asic_type == CHIP_TONGA) {
3171                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3172                                             DOORBELL_RANGE_LOWER,
3173                                             AMDGPU_DOORBELL_GFX_RING0);
3174                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3175
3176                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3177                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3178                 }
3179
3180         }
3181
3182         /* start the ring */
3183         gfx_v8_0_cp_gfx_start(adev);
3184         ring->ready = true;
3185         r = amdgpu_ring_test_ring(ring);
3186         if (r) {
3187                 ring->ready = false;
3188                 return r;
3189         }
3190
3191         return 0;
3192 }
3193
3194 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3195 {
3196         int i;
3197
3198         if (enable) {
3199                 WREG32(mmCP_MEC_CNTL, 0);
3200         } else {
3201                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3202                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3203                         adev->gfx.compute_ring[i].ready = false;
3204         }
3205         udelay(50);
3206 }
3207
3208 static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3209 {
3210         gfx_v8_0_cp_compute_enable(adev, true);
3211
3212         return 0;
3213 }
3214
3215 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3216 {
3217         const struct gfx_firmware_header_v1_0 *mec_hdr;
3218         const __le32 *fw_data;
3219         unsigned i, fw_size;
3220
3221         if (!adev->gfx.mec_fw)
3222                 return -EINVAL;
3223
3224         gfx_v8_0_cp_compute_enable(adev, false);
3225
3226         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3227         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3228
3229         fw_data = (const __le32 *)
3230                 (adev->gfx.mec_fw->data +
3231                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3232         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3233
3234         /* MEC1 */
3235         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3236         for (i = 0; i < fw_size; i++)
3237                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3238         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3239
3240         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3241         if (adev->gfx.mec2_fw) {
3242                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3243
3244                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3245                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3246
3247                 fw_data = (const __le32 *)
3248                         (adev->gfx.mec2_fw->data +
3249                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3250                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3251
3252                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3253                 for (i = 0; i < fw_size; i++)
3254                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3255                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3256         }
3257
3258         return 0;
3259 }
3260
3261 struct vi_mqd {
3262         uint32_t header;  /* ordinal0 */
3263         uint32_t compute_dispatch_initiator;  /* ordinal1 */
3264         uint32_t compute_dim_x;  /* ordinal2 */
3265         uint32_t compute_dim_y;  /* ordinal3 */
3266         uint32_t compute_dim_z;  /* ordinal4 */
3267         uint32_t compute_start_x;  /* ordinal5 */
3268         uint32_t compute_start_y;  /* ordinal6 */
3269         uint32_t compute_start_z;  /* ordinal7 */
3270         uint32_t compute_num_thread_x;  /* ordinal8 */
3271         uint32_t compute_num_thread_y;  /* ordinal9 */
3272         uint32_t compute_num_thread_z;  /* ordinal10 */
3273         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3274         uint32_t compute_perfcount_enable;  /* ordinal12 */
3275         uint32_t compute_pgm_lo;  /* ordinal13 */
3276         uint32_t compute_pgm_hi;  /* ordinal14 */
3277         uint32_t compute_tba_lo;  /* ordinal15 */
3278         uint32_t compute_tba_hi;  /* ordinal16 */
3279         uint32_t compute_tma_lo;  /* ordinal17 */
3280         uint32_t compute_tma_hi;  /* ordinal18 */
3281         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3282         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3283         uint32_t compute_vmid;  /* ordinal21 */
3284         uint32_t compute_resource_limits;  /* ordinal22 */
3285         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3286         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3287         uint32_t compute_tmpring_size;  /* ordinal25 */
3288         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3289         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3290         uint32_t compute_restart_x;  /* ordinal28 */
3291         uint32_t compute_restart_y;  /* ordinal29 */
3292         uint32_t compute_restart_z;  /* ordinal30 */
3293         uint32_t compute_thread_trace_enable;  /* ordinal31 */
3294         uint32_t compute_misc_reserved;  /* ordinal32 */
3295         uint32_t compute_dispatch_id;  /* ordinal33 */
3296         uint32_t compute_threadgroup_id;  /* ordinal34 */
3297         uint32_t compute_relaunch;  /* ordinal35 */
3298         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3299         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3300         uint32_t compute_wave_restore_control;  /* ordinal38 */
3301         uint32_t reserved9;  /* ordinal39 */
3302         uint32_t reserved10;  /* ordinal40 */
3303         uint32_t reserved11;  /* ordinal41 */
3304         uint32_t reserved12;  /* ordinal42 */
3305         uint32_t reserved13;  /* ordinal43 */
3306         uint32_t reserved14;  /* ordinal44 */
3307         uint32_t reserved15;  /* ordinal45 */
3308         uint32_t reserved16;  /* ordinal46 */
3309         uint32_t reserved17;  /* ordinal47 */
3310         uint32_t reserved18;  /* ordinal48 */
3311         uint32_t reserved19;  /* ordinal49 */
3312         uint32_t reserved20;  /* ordinal50 */
3313         uint32_t reserved21;  /* ordinal51 */
3314         uint32_t reserved22;  /* ordinal52 */
3315         uint32_t reserved23;  /* ordinal53 */
3316         uint32_t reserved24;  /* ordinal54 */
3317         uint32_t reserved25;  /* ordinal55 */
3318         uint32_t reserved26;  /* ordinal56 */
3319         uint32_t reserved27;  /* ordinal57 */
3320         uint32_t reserved28;  /* ordinal58 */
3321         uint32_t reserved29;  /* ordinal59 */
3322         uint32_t reserved30;  /* ordinal60 */
3323         uint32_t reserved31;  /* ordinal61 */
3324         uint32_t reserved32;  /* ordinal62 */
3325         uint32_t reserved33;  /* ordinal63 */
3326         uint32_t reserved34;  /* ordinal64 */
3327         uint32_t compute_user_data_0;  /* ordinal65 */
3328         uint32_t compute_user_data_1;  /* ordinal66 */
3329         uint32_t compute_user_data_2;  /* ordinal67 */
3330         uint32_t compute_user_data_3;  /* ordinal68 */
3331         uint32_t compute_user_data_4;  /* ordinal69 */
3332         uint32_t compute_user_data_5;  /* ordinal70 */
3333         uint32_t compute_user_data_6;  /* ordinal71 */
3334         uint32_t compute_user_data_7;  /* ordinal72 */
3335         uint32_t compute_user_data_8;  /* ordinal73 */
3336         uint32_t compute_user_data_9;  /* ordinal74 */
3337         uint32_t compute_user_data_10;  /* ordinal75 */
3338         uint32_t compute_user_data_11;  /* ordinal76 */
3339         uint32_t compute_user_data_12;  /* ordinal77 */
3340         uint32_t compute_user_data_13;  /* ordinal78 */
3341         uint32_t compute_user_data_14;  /* ordinal79 */
3342         uint32_t compute_user_data_15;  /* ordinal80 */
3343         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3344         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3345         uint32_t reserved35;  /* ordinal83 */
3346         uint32_t reserved36;  /* ordinal84 */
3347         uint32_t reserved37;  /* ordinal85 */
3348         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3349         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3350         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3351         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3352         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3353         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3354         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3355         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3356         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3357         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3358         uint32_t reserved38;  /* ordinal96 */
3359         uint32_t reserved39;  /* ordinal97 */
3360         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3361         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3362         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3363         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3364         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3365         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3366         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3367         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3368         uint32_t reserved40;  /* ordinal106 */
3369         uint32_t reserved41;  /* ordinal107 */
3370         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3371         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3372         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3373         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3374         uint32_t reserved42;  /* ordinal112 */
3375         uint32_t reserved43;  /* ordinal113 */
3376         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3377         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3378         uint32_t cp_packet_id_lo;  /* ordinal116 */
3379         uint32_t cp_packet_id_hi;  /* ordinal117 */
3380         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3381         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3382         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3383         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3384         uint32_t gds_save_mask_lo;  /* ordinal122 */
3385         uint32_t gds_save_mask_hi;  /* ordinal123 */
3386         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3387         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3388         uint32_t reserved44;  /* ordinal126 */
3389         uint32_t reserved45;  /* ordinal127 */
3390         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3391         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3392         uint32_t cp_hqd_active;  /* ordinal130 */
3393         uint32_t cp_hqd_vmid;  /* ordinal131 */
3394         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3395         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3396         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3397         uint32_t cp_hqd_quantum;  /* ordinal135 */
3398         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3399         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3400         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3401         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3402         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3403         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3404         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3405         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3406         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3407         uint32_t cp_hqd_pq_control;  /* ordinal145 */
3408         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3409         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3410         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3411         uint32_t cp_hqd_ib_control;  /* ordinal149 */
3412         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3413         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3414         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3415         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3416         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3417         uint32_t cp_hqd_msg_type;  /* ordinal155 */
3418         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3419         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3420         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3421         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3422         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3423         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3424         uint32_t cp_mqd_control;  /* ordinal162 */
3425         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3426         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3427         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3428         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3429         uint32_t cp_hqd_eop_control;  /* ordinal167 */
3430         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3431         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3432         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3433         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3434         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3435         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3436         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3437         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3438         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3439         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3440         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3441         uint32_t cp_hqd_error;  /* ordinal179 */
3442         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3443         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3444         uint32_t reserved46;  /* ordinal182 */
3445         uint32_t reserved47;  /* ordinal183 */
3446         uint32_t reserved48;  /* ordinal184 */
3447         uint32_t reserved49;  /* ordinal185 */
3448         uint32_t reserved50;  /* ordinal186 */
3449         uint32_t reserved51;  /* ordinal187 */
3450         uint32_t reserved52;  /* ordinal188 */
3451         uint32_t reserved53;  /* ordinal189 */
3452         uint32_t reserved54;  /* ordinal190 */
3453         uint32_t reserved55;  /* ordinal191 */
3454         uint32_t iqtimer_pkt_header;  /* ordinal192 */
3455         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3456         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3457         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3458         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3459         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3460         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3461         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3462         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3463         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3464         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3465         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3466         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3467         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3468         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3469         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3470         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3471         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3472         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3473         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3474         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3475         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3476         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3477         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3478         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3479         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3480         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3481         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3482         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3483         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3484         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3485         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3486         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3487         uint32_t reserved56;  /* ordinal225 */
3488         uint32_t reserved57;  /* ordinal226 */
3489         uint32_t reserved58;  /* ordinal227 */
3490         uint32_t set_resources_header;  /* ordinal228 */
3491         uint32_t set_resources_dw1;  /* ordinal229 */
3492         uint32_t set_resources_dw2;  /* ordinal230 */
3493         uint32_t set_resources_dw3;  /* ordinal231 */
3494         uint32_t set_resources_dw4;  /* ordinal232 */
3495         uint32_t set_resources_dw5;  /* ordinal233 */
3496         uint32_t set_resources_dw6;  /* ordinal234 */
3497         uint32_t set_resources_dw7;  /* ordinal235 */
3498         uint32_t reserved59;  /* ordinal236 */
3499         uint32_t reserved60;  /* ordinal237 */
3500         uint32_t reserved61;  /* ordinal238 */
3501         uint32_t reserved62;  /* ordinal239 */
3502         uint32_t reserved63;  /* ordinal240 */
3503         uint32_t reserved64;  /* ordinal241 */
3504         uint32_t reserved65;  /* ordinal242 */
3505         uint32_t reserved66;  /* ordinal243 */
3506         uint32_t reserved67;  /* ordinal244 */
3507         uint32_t reserved68;  /* ordinal245 */
3508         uint32_t reserved69;  /* ordinal246 */
3509         uint32_t reserved70;  /* ordinal247 */
3510         uint32_t reserved71;  /* ordinal248 */
3511         uint32_t reserved72;  /* ordinal249 */
3512         uint32_t reserved73;  /* ordinal250 */
3513         uint32_t reserved74;  /* ordinal251 */
3514         uint32_t reserved75;  /* ordinal252 */
3515         uint32_t reserved76;  /* ordinal253 */
3516         uint32_t reserved77;  /* ordinal254 */
3517         uint32_t reserved78;  /* ordinal255 */
3518
3519         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3520 };
3521
3522 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3523 {
3524         int i, r;
3525
3526         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3527                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3528
3529                 if (ring->mqd_obj) {
3530                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3531                         if (unlikely(r != 0))
3532                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3533
3534                         amdgpu_bo_unpin(ring->mqd_obj);
3535                         amdgpu_bo_unreserve(ring->mqd_obj);
3536
3537                         amdgpu_bo_unref(&ring->mqd_obj);
3538                         ring->mqd_obj = NULL;
3539                 }
3540         }
3541 }
3542
3543 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3544 {
3545         int r, i, j;
3546         u32 tmp;
3547         bool use_doorbell = true;
3548         u64 hqd_gpu_addr;
3549         u64 mqd_gpu_addr;
3550         u64 eop_gpu_addr;
3551         u64 wb_gpu_addr;
3552         u32 *buf;
3553         struct vi_mqd *mqd;
3554
3555         /* init the pipes */
3556         mutex_lock(&adev->srbm_mutex);
3557         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3558                 int me = (i < 4) ? 1 : 2;
3559                 int pipe = (i < 4) ? i : (i - 4);
3560
3561                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3562                 eop_gpu_addr >>= 8;
3563
3564                 vi_srbm_select(adev, me, pipe, 0, 0);
3565
3566                 /* write the EOP addr */
3567                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3568                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3569
3570                 /* set the VMID assigned */
3571                 WREG32(mmCP_HQD_VMID, 0);
3572
3573                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3574                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3575                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3576                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
3577                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3578         }
3579         vi_srbm_select(adev, 0, 0, 0, 0);
3580         mutex_unlock(&adev->srbm_mutex);
3581
3582         /* init the queues.  Just two for now. */
3583         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3584                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3585
3586                 if (ring->mqd_obj == NULL) {
3587                         r = amdgpu_bo_create(adev,
3588                                              sizeof(struct vi_mqd),
3589                                              PAGE_SIZE, true,
3590                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3591                                              NULL, &ring->mqd_obj);
3592                         if (r) {
3593                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3594                                 return r;
3595                         }
3596                 }
3597
3598                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3599                 if (unlikely(r != 0)) {
3600                         gfx_v8_0_cp_compute_fini(adev);
3601                         return r;
3602                 }
3603                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3604                                   &mqd_gpu_addr);
3605                 if (r) {
3606                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3607                         gfx_v8_0_cp_compute_fini(adev);
3608                         return r;
3609                 }
3610                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3611                 if (r) {
3612                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3613                         gfx_v8_0_cp_compute_fini(adev);
3614                         return r;
3615                 }
3616
3617                 /* init the mqd struct */
3618                 memset(buf, 0, sizeof(struct vi_mqd));
3619
3620                 mqd = (struct vi_mqd *)buf;
3621                 mqd->header = 0xC0310800;
3622                 mqd->compute_pipelinestat_enable = 0x00000001;
3623                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3624                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3625                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3626                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3627                 mqd->compute_misc_reserved = 0x00000003;
3628
3629                 mutex_lock(&adev->srbm_mutex);
3630                 vi_srbm_select(adev, ring->me,
3631                                ring->pipe,
3632                                ring->queue, 0);
3633
3634                 /* disable wptr polling */
3635                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3636                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3637                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3638
3639                 mqd->cp_hqd_eop_base_addr_lo =
3640                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
3641                 mqd->cp_hqd_eop_base_addr_hi =
3642                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3643
3644                 /* enable doorbell? */
3645                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3646                 if (use_doorbell) {
3647                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3648                 } else {
3649                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3650                 }
3651                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3652                 mqd->cp_hqd_pq_doorbell_control = tmp;
3653
3654                 /* disable the queue if it's active */
3655                 mqd->cp_hqd_dequeue_request = 0;
3656                 mqd->cp_hqd_pq_rptr = 0;
3657                 mqd->cp_hqd_pq_wptr= 0;
3658                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3659                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3660                         for (j = 0; j < adev->usec_timeout; j++) {
3661                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3662                                         break;
3663                                 udelay(1);
3664                         }
3665                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3666                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3667                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3668                 }
3669
3670                 /* set the pointer to the MQD */
3671                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3672                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3673                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3674                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3675
3676                 /* set MQD vmid to 0 */
3677                 tmp = RREG32(mmCP_MQD_CONTROL);
3678                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3679                 WREG32(mmCP_MQD_CONTROL, tmp);
3680                 mqd->cp_mqd_control = tmp;
3681
3682                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3683                 hqd_gpu_addr = ring->gpu_addr >> 8;
3684                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3685                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3686                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3687                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3688
3689                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3690                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3691                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3692                                     (order_base_2(ring->ring_size / 4) - 1));
3693                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3694                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3695 #ifdef __BIG_ENDIAN
3696                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3697 #endif
3698                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3699                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3700                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3701                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3702                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3703                 mqd->cp_hqd_pq_control = tmp;
3704
3705                 /* set the wb address wether it's enabled or not */
3706                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3707                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3708                 mqd->cp_hqd_pq_rptr_report_addr_hi =
3709                         upper_32_bits(wb_gpu_addr) & 0xffff;
3710                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3711                        mqd->cp_hqd_pq_rptr_report_addr_lo);
3712                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3713                        mqd->cp_hqd_pq_rptr_report_addr_hi);
3714
3715                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3716                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3717                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3718                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3719                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3720                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3721                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
3722
3723                 /* enable the doorbell if requested */
3724                 if (use_doorbell) {
3725                         if ((adev->asic_type == CHIP_CARRIZO) ||
3726                             (adev->asic_type == CHIP_FIJI) ||
3727                             (adev->asic_type == CHIP_STONEY)) {
3728                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3729                                        AMDGPU_DOORBELL_KIQ << 2);
3730                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3731                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
3732                         }
3733                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3734                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3735                                             DOORBELL_OFFSET, ring->doorbell_index);
3736                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3737                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3738                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3739                         mqd->cp_hqd_pq_doorbell_control = tmp;
3740
3741                 } else {
3742                         mqd->cp_hqd_pq_doorbell_control = 0;
3743                 }
3744                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3745                        mqd->cp_hqd_pq_doorbell_control);
3746
3747                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3748                 ring->wptr = 0;
3749                 mqd->cp_hqd_pq_wptr = ring->wptr;
3750                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3751                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3752
3753                 /* set the vmid for the queue */
3754                 mqd->cp_hqd_vmid = 0;
3755                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3756
3757                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3758                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3759                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3760                 mqd->cp_hqd_persistent_state = tmp;
3761                 if (adev->asic_type == CHIP_STONEY) {
3762                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3763                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3764                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3765                 }
3766
3767                 /* activate the queue */
3768                 mqd->cp_hqd_active = 1;
3769                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3770
3771                 vi_srbm_select(adev, 0, 0, 0, 0);
3772                 mutex_unlock(&adev->srbm_mutex);
3773
3774                 amdgpu_bo_kunmap(ring->mqd_obj);
3775                 amdgpu_bo_unreserve(ring->mqd_obj);
3776         }
3777
3778         if (use_doorbell) {
3779                 tmp = RREG32(mmCP_PQ_STATUS);
3780                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3781                 WREG32(mmCP_PQ_STATUS, tmp);
3782         }
3783
3784         r = gfx_v8_0_cp_compute_start(adev);
3785         if (r)
3786                 return r;
3787
3788         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3789                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3790
3791                 ring->ready = true;
3792                 r = amdgpu_ring_test_ring(ring);
3793                 if (r)
3794                         ring->ready = false;
3795         }
3796
3797         return 0;
3798 }
3799
3800 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3801 {
3802         int r;
3803
3804         if (!(adev->flags & AMD_IS_APU))
3805                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3806
3807         if (!adev->pp_enabled) {
3808                 if (!adev->firmware.smu_load) {
3809                         /* legacy firmware loading */
3810                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
3811                         if (r)
3812                                 return r;
3813
3814                         r = gfx_v8_0_cp_compute_load_microcode(adev);
3815                         if (r)
3816                                 return r;
3817                 } else {
3818                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3819                                                         AMDGPU_UCODE_ID_CP_CE);
3820                         if (r)
3821                                 return -EINVAL;
3822
3823                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3824                                                         AMDGPU_UCODE_ID_CP_PFP);
3825                         if (r)
3826                                 return -EINVAL;
3827
3828                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3829                                                         AMDGPU_UCODE_ID_CP_ME);
3830                         if (r)
3831                                 return -EINVAL;
3832
3833                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3834                                                         AMDGPU_UCODE_ID_CP_MEC1);
3835                         if (r)
3836                                 return -EINVAL;
3837                 }
3838         }
3839
3840         r = gfx_v8_0_cp_gfx_resume(adev);
3841         if (r)
3842                 return r;
3843
3844         r = gfx_v8_0_cp_compute_resume(adev);
3845         if (r)
3846                 return r;
3847
3848         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3849
3850         return 0;
3851 }
3852
3853 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3854 {
3855         gfx_v8_0_cp_gfx_enable(adev, enable);
3856         gfx_v8_0_cp_compute_enable(adev, enable);
3857 }
3858
3859 static int gfx_v8_0_hw_init(void *handle)
3860 {
3861         int r;
3862         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3863
3864         gfx_v8_0_init_golden_registers(adev);
3865
3866         gfx_v8_0_gpu_init(adev);
3867
3868         r = gfx_v8_0_rlc_resume(adev);
3869         if (r)
3870                 return r;
3871
3872         r = gfx_v8_0_cp_resume(adev);
3873         if (r)
3874                 return r;
3875
3876         return r;
3877 }
3878
3879 static int gfx_v8_0_hw_fini(void *handle)
3880 {
3881         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3882
3883         gfx_v8_0_cp_enable(adev, false);
3884         gfx_v8_0_rlc_stop(adev);
3885         gfx_v8_0_cp_compute_fini(adev);
3886
3887         return 0;
3888 }
3889
3890 static int gfx_v8_0_suspend(void *handle)
3891 {
3892         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3893
3894         return gfx_v8_0_hw_fini(adev);
3895 }
3896
3897 static int gfx_v8_0_resume(void *handle)
3898 {
3899         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3900
3901         return gfx_v8_0_hw_init(adev);
3902 }
3903
3904 static bool gfx_v8_0_is_idle(void *handle)
3905 {
3906         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3907
3908         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3909                 return false;
3910         else
3911                 return true;
3912 }
3913
3914 static int gfx_v8_0_wait_for_idle(void *handle)
3915 {
3916         unsigned i;
3917         u32 tmp;
3918         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3919
3920         for (i = 0; i < adev->usec_timeout; i++) {
3921                 /* read MC_STATUS */
3922                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3923
3924                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3925                         return 0;
3926                 udelay(1);
3927         }
3928         return -ETIMEDOUT;
3929 }
3930
3931 static void gfx_v8_0_print_status(void *handle)
3932 {
3933         int i;
3934         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3935
3936         dev_info(adev->dev, "GFX 8.x registers\n");
3937         dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3938                  RREG32(mmGRBM_STATUS));
3939         dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3940                  RREG32(mmGRBM_STATUS2));
3941         dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3942                  RREG32(mmGRBM_STATUS_SE0));
3943         dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3944                  RREG32(mmGRBM_STATUS_SE1));
3945         dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3946                  RREG32(mmGRBM_STATUS_SE2));
3947         dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3948                  RREG32(mmGRBM_STATUS_SE3));
3949         dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3950         dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3951                  RREG32(mmCP_STALLED_STAT1));
3952         dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3953                  RREG32(mmCP_STALLED_STAT2));
3954         dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3955                  RREG32(mmCP_STALLED_STAT3));
3956         dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3957                  RREG32(mmCP_CPF_BUSY_STAT));
3958         dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3959                  RREG32(mmCP_CPF_STALLED_STAT1));
3960         dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3961         dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3962         dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3963                  RREG32(mmCP_CPC_STALLED_STAT1));
3964         dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3965
3966         for (i = 0; i < 32; i++) {
3967                 dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3968                          i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3969         }
3970         for (i = 0; i < 16; i++) {
3971                 dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3972                          i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3973         }
3974         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3975                 dev_info(adev->dev, "  se: %d\n", i);
3976                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3977                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3978                          RREG32(mmPA_SC_RASTER_CONFIG));
3979                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3980                          RREG32(mmPA_SC_RASTER_CONFIG_1));
3981         }
3982         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3983
3984         dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3985                  RREG32(mmGB_ADDR_CONFIG));
3986         dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3987                  RREG32(mmHDP_ADDR_CONFIG));
3988         dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3989                  RREG32(mmDMIF_ADDR_CALC));
3990         dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
3991                  RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
3992         dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
3993                  RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
3994         dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
3995                  RREG32(mmUVD_UDEC_ADDR_CONFIG));
3996         dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
3997                  RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
3998         dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
3999                  RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4000
4001         dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
4002                  RREG32(mmCP_MEQ_THRESHOLDS));
4003         dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
4004                  RREG32(mmSX_DEBUG_1));
4005         dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
4006                  RREG32(mmTA_CNTL_AUX));
4007         dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
4008                  RREG32(mmSPI_CONFIG_CNTL));
4009         dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
4010                  RREG32(mmSQ_CONFIG));
4011         dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
4012                  RREG32(mmDB_DEBUG));
4013         dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
4014                  RREG32(mmDB_DEBUG2));
4015         dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
4016                  RREG32(mmDB_DEBUG3));
4017         dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
4018                  RREG32(mmCB_HW_CONTROL));
4019         dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
4020                  RREG32(mmSPI_CONFIG_CNTL_1));
4021         dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
4022                  RREG32(mmPA_SC_FIFO_SIZE));
4023         dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
4024                  RREG32(mmVGT_NUM_INSTANCES));
4025         dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
4026                  RREG32(mmCP_PERFMON_CNTL));
4027         dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4028                  RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4029         dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
4030                  RREG32(mmVGT_CACHE_INVALIDATION));
4031         dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
4032                  RREG32(mmVGT_GS_VERTEX_REUSE));
4033         dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4034                  RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4035         dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
4036                  RREG32(mmPA_CL_ENHANCE));
4037         dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
4038                  RREG32(mmPA_SC_ENHANCE));
4039
4040         dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
4041                  RREG32(mmCP_ME_CNTL));
4042         dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
4043                  RREG32(mmCP_MAX_CONTEXT));
4044         dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
4045                  RREG32(mmCP_ENDIAN_SWAP));
4046         dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
4047                  RREG32(mmCP_DEVICE_ID));
4048
4049         dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4050                  RREG32(mmCP_SEM_WAIT_TIMER));
4051
4052         dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4053                  RREG32(mmCP_RB_WPTR_DELAY));
4054         dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4055                  RREG32(mmCP_RB_VMID));
4056         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4057                  RREG32(mmCP_RB0_CNTL));
4058         dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4059                  RREG32(mmCP_RB0_WPTR));
4060         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4061                  RREG32(mmCP_RB0_RPTR_ADDR));
4062         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4063                  RREG32(mmCP_RB0_RPTR_ADDR_HI));
4064         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4065                  RREG32(mmCP_RB0_CNTL));
4066         dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4067                  RREG32(mmCP_RB0_BASE));
4068         dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4069                  RREG32(mmCP_RB0_BASE_HI));
4070         dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4071                  RREG32(mmCP_MEC_CNTL));
4072         dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4073                  RREG32(mmCP_CPF_DEBUG));
4074
4075         dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4076                  RREG32(mmSCRATCH_ADDR));
4077         dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4078                  RREG32(mmSCRATCH_UMSK));
4079
4080         dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4081                  RREG32(mmCP_INT_CNTL_RING0));
4082         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4083                  RREG32(mmRLC_LB_CNTL));
4084         dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4085                  RREG32(mmRLC_CNTL));
4086         dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4087                  RREG32(mmRLC_CGCG_CGLS_CTRL));
4088         dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4089                  RREG32(mmRLC_LB_CNTR_INIT));
4090         dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4091                  RREG32(mmRLC_LB_CNTR_MAX));
4092         dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4093                  RREG32(mmRLC_LB_INIT_CU_MASK));
4094         dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4095                  RREG32(mmRLC_LB_PARAMS));
4096         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4097                  RREG32(mmRLC_LB_CNTL));
4098         dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4099                  RREG32(mmRLC_MC_CNTL));
4100         dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4101                  RREG32(mmRLC_UCODE_CNTL));
4102
4103         mutex_lock(&adev->srbm_mutex);
4104         for (i = 0; i < 16; i++) {
4105                 vi_srbm_select(adev, 0, 0, 0, i);
4106                 dev_info(adev->dev, "  VM %d:\n", i);
4107                 dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4108                          RREG32(mmSH_MEM_CONFIG));
4109                 dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4110                          RREG32(mmSH_MEM_APE1_BASE));
4111                 dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4112                          RREG32(mmSH_MEM_APE1_LIMIT));
4113                 dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4114                          RREG32(mmSH_MEM_BASES));
4115         }
4116         vi_srbm_select(adev, 0, 0, 0, 0);
4117         mutex_unlock(&adev->srbm_mutex);
4118 }
4119
4120 static int gfx_v8_0_soft_reset(void *handle)
4121 {
4122         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4123         u32 tmp;
4124         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4125
4126         /* GRBM_STATUS */
4127         tmp = RREG32(mmGRBM_STATUS);
4128         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4129                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4130                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4131                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4132                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4133                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4134                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4135                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4136                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4137                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4138         }
4139
4140         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4141                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4142                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4143                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4144                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4145         }
4146
4147         /* GRBM_STATUS2 */
4148         tmp = RREG32(mmGRBM_STATUS2);
4149         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4150                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4151                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4152
4153         /* SRBM_STATUS */
4154         tmp = RREG32(mmSRBM_STATUS);
4155         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4156                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4157                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4158
4159         if (grbm_soft_reset || srbm_soft_reset) {
4160                 gfx_v8_0_print_status((void *)adev);
4161                 /* stop the rlc */
4162                 gfx_v8_0_rlc_stop(adev);
4163
4164                 /* Disable GFX parsing/prefetching */
4165                 gfx_v8_0_cp_gfx_enable(adev, false);
4166
4167                 /* Disable MEC parsing/prefetching */
4168                 /* XXX todo */
4169
4170                 if (grbm_soft_reset) {
4171                         tmp = RREG32(mmGRBM_SOFT_RESET);
4172                         tmp |= grbm_soft_reset;
4173                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4174                         WREG32(mmGRBM_SOFT_RESET, tmp);
4175                         tmp = RREG32(mmGRBM_SOFT_RESET);
4176
4177                         udelay(50);
4178
4179                         tmp &= ~grbm_soft_reset;
4180                         WREG32(mmGRBM_SOFT_RESET, tmp);
4181                         tmp = RREG32(mmGRBM_SOFT_RESET);
4182                 }
4183
4184                 if (srbm_soft_reset) {
4185                         tmp = RREG32(mmSRBM_SOFT_RESET);
4186                         tmp |= srbm_soft_reset;
4187                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4188                         WREG32(mmSRBM_SOFT_RESET, tmp);
4189                         tmp = RREG32(mmSRBM_SOFT_RESET);
4190
4191                         udelay(50);
4192
4193                         tmp &= ~srbm_soft_reset;
4194                         WREG32(mmSRBM_SOFT_RESET, tmp);
4195                         tmp = RREG32(mmSRBM_SOFT_RESET);
4196                 }
4197                 /* Wait a little for things to settle down */
4198                 udelay(50);
4199                 gfx_v8_0_print_status((void *)adev);
4200         }
4201         return 0;
4202 }
4203
4204 /**
4205  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4206  *
4207  * @adev: amdgpu_device pointer
4208  *
4209  * Fetches a GPU clock counter snapshot.
4210  * Returns the 64 bit clock counter snapshot.
4211  */
4212 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4213 {
4214         uint64_t clock;
4215
4216         mutex_lock(&adev->gfx.gpu_clock_mutex);
4217         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4218         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4219                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4220         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4221         return clock;
4222 }
4223
4224 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4225                                           uint32_t vmid,
4226                                           uint32_t gds_base, uint32_t gds_size,
4227                                           uint32_t gws_base, uint32_t gws_size,
4228                                           uint32_t oa_base, uint32_t oa_size)
4229 {
4230         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4231         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4232
4233         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4234         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4235
4236         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4237         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4238
4239         /* GDS Base */
4240         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4241         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4242                                 WRITE_DATA_DST_SEL(0)));
4243         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4244         amdgpu_ring_write(ring, 0);
4245         amdgpu_ring_write(ring, gds_base);
4246
4247         /* GDS Size */
4248         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4249         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4250                                 WRITE_DATA_DST_SEL(0)));
4251         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4252         amdgpu_ring_write(ring, 0);
4253         amdgpu_ring_write(ring, gds_size);
4254
4255         /* GWS */
4256         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4257         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4258                                 WRITE_DATA_DST_SEL(0)));
4259         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4260         amdgpu_ring_write(ring, 0);
4261         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4262
4263         /* OA */
4264         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4265         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4266                                 WRITE_DATA_DST_SEL(0)));
4267         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4268         amdgpu_ring_write(ring, 0);
4269         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4270 }
4271
4272 static int gfx_v8_0_early_init(void *handle)
4273 {
4274         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4275
4276         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4277         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4278         gfx_v8_0_set_ring_funcs(adev);
4279         gfx_v8_0_set_irq_funcs(adev);
4280         gfx_v8_0_set_gds_init(adev);
4281
4282         return 0;
4283 }
4284
4285 static int gfx_v8_0_late_init(void *handle)
4286 {
4287         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4288         int r;
4289
4290         /* requires IBs so do in late init after IB pool is initialized */
4291         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4292         if (r)
4293                 return r;
4294
4295         return 0;
4296 }
4297
4298 static int gfx_v8_0_set_powergating_state(void *handle,
4299                                           enum amd_powergating_state state)
4300 {
4301         return 0;
4302 }
4303
4304 static int gfx_v8_0_set_clockgating_state(void *handle,
4305                                           enum amd_clockgating_state state)
4306 {
4307         return 0;
4308 }
4309
4310 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4311 {
4312         u32 rptr;
4313
4314         rptr = ring->adev->wb.wb[ring->rptr_offs];
4315
4316         return rptr;
4317 }
4318
4319 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4320 {
4321         struct amdgpu_device *adev = ring->adev;
4322         u32 wptr;
4323
4324         if (ring->use_doorbell)
4325                 /* XXX check if swapping is necessary on BE */
4326                 wptr = ring->adev->wb.wb[ring->wptr_offs];
4327         else
4328                 wptr = RREG32(mmCP_RB0_WPTR);
4329
4330         return wptr;
4331 }
4332
4333 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4334 {
4335         struct amdgpu_device *adev = ring->adev;
4336
4337         if (ring->use_doorbell) {
4338                 /* XXX check if swapping is necessary on BE */
4339                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4340                 WDOORBELL32(ring->doorbell_index, ring->wptr);
4341         } else {
4342                 WREG32(mmCP_RB0_WPTR, ring->wptr);
4343                 (void)RREG32(mmCP_RB0_WPTR);
4344         }
4345 }
4346
4347 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4348 {
4349         u32 ref_and_mask, reg_mem_engine;
4350
4351         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4352                 switch (ring->me) {
4353                 case 1:
4354                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4355                         break;
4356                 case 2:
4357                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4358                         break;
4359                 default:
4360                         return;
4361                 }
4362                 reg_mem_engine = 0;
4363         } else {
4364                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4365                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4366         }
4367
4368         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4369         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4370                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
4371                                  reg_mem_engine));
4372         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4373         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4374         amdgpu_ring_write(ring, ref_and_mask);
4375         amdgpu_ring_write(ring, ref_and_mask);
4376         amdgpu_ring_write(ring, 0x20); /* poll interval */
4377 }
4378
4379 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4380                                   struct amdgpu_ib *ib)
4381 {
4382         bool need_ctx_switch = ring->current_ctx != ib->ctx;
4383         u32 header, control = 0;
4384         u32 next_rptr = ring->wptr + 5;
4385
4386         /* drop the CE preamble IB for the same context */
4387         if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4388                 return;
4389
4390         if (need_ctx_switch)
4391                 next_rptr += 2;
4392
4393         next_rptr += 4;
4394         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4395         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4396         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4397         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4398         amdgpu_ring_write(ring, next_rptr);
4399
4400         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4401         if (need_ctx_switch) {
4402                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4403                 amdgpu_ring_write(ring, 0);
4404         }
4405
4406         if (ib->flags & AMDGPU_IB_FLAG_CE)
4407                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4408         else
4409                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4410
4411         control |= ib->length_dw |
4412                 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4413
4414         amdgpu_ring_write(ring, header);
4415         amdgpu_ring_write(ring,
4416 #ifdef __BIG_ENDIAN
4417                           (2 << 0) |
4418 #endif
4419                           (ib->gpu_addr & 0xFFFFFFFC));
4420         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4421         amdgpu_ring_write(ring, control);
4422 }
4423
4424 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4425                                   struct amdgpu_ib *ib)
4426 {
4427         u32 header, control = 0;
4428         u32 next_rptr = ring->wptr + 5;
4429
4430         control |= INDIRECT_BUFFER_VALID;
4431
4432         next_rptr += 4;
4433         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4434         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4435         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4436         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4437         amdgpu_ring_write(ring, next_rptr);
4438
4439         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4440
4441         control |= ib->length_dw |
4442                            (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4443
4444         amdgpu_ring_write(ring, header);
4445         amdgpu_ring_write(ring,
4446 #ifdef __BIG_ENDIAN
4447                                           (2 << 0) |
4448 #endif
4449                                           (ib->gpu_addr & 0xFFFFFFFC));
4450         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4451         amdgpu_ring_write(ring, control);
4452 }
4453
4454 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4455                                          u64 seq, unsigned flags)
4456 {
4457         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4458         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4459
4460         /* EVENT_WRITE_EOP - flush caches, send int */
4461         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4462         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4463                                  EOP_TC_ACTION_EN |
4464                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4465                                  EVENT_INDEX(5)));
4466         amdgpu_ring_write(ring, addr & 0xfffffffc);
4467         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4468                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4469         amdgpu_ring_write(ring, lower_32_bits(seq));
4470         amdgpu_ring_write(ring, upper_32_bits(seq));
4471
4472 }
4473
4474 /**
4475  * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4476  *
4477  * @ring: amdgpu ring buffer object
4478  * @semaphore: amdgpu semaphore object
4479  * @emit_wait: Is this a sempahore wait?
4480  *
4481  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4482  * from running ahead of semaphore waits.
4483  */
4484 static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4485                                          struct amdgpu_semaphore *semaphore,
4486                                          bool emit_wait)
4487 {
4488         uint64_t addr = semaphore->gpu_addr;
4489         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4490
4491         if (ring->adev->asic_type == CHIP_TOPAZ ||
4492             ring->adev->asic_type == CHIP_TONGA ||
4493             ring->adev->asic_type == CHIP_FIJI)
4494                 /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4495                 return false;
4496         else {
4497                 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4498                 amdgpu_ring_write(ring, lower_32_bits(addr));
4499                 amdgpu_ring_write(ring, upper_32_bits(addr));
4500                 amdgpu_ring_write(ring, sel);
4501         }
4502
4503         if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4504                 /* Prevent the PFP from running ahead of the semaphore wait */
4505                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4506                 amdgpu_ring_write(ring, 0x0);
4507         }
4508
4509         return true;
4510 }
4511
4512 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4513                                         unsigned vm_id, uint64_t pd_addr)
4514 {
4515         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4516         uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
4517         uint64_t addr = ring->fence_drv.gpu_addr;
4518
4519         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4520         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4521                  WAIT_REG_MEM_FUNCTION(3))); /* equal */
4522         amdgpu_ring_write(ring, addr & 0xfffffffc);
4523         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4524         amdgpu_ring_write(ring, seq);
4525         amdgpu_ring_write(ring, 0xffffffff);
4526         amdgpu_ring_write(ring, 4); /* poll interval */
4527
4528         if (usepfp) {
4529                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4530                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4531                 amdgpu_ring_write(ring, 0);
4532                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4533                 amdgpu_ring_write(ring, 0);
4534         }
4535
4536         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4537         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4538                                  WRITE_DATA_DST_SEL(0)) |
4539                                  WR_CONFIRM);
4540         if (vm_id < 8) {
4541                 amdgpu_ring_write(ring,
4542                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4543         } else {
4544                 amdgpu_ring_write(ring,
4545                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4546         }
4547         amdgpu_ring_write(ring, 0);
4548         amdgpu_ring_write(ring, pd_addr >> 12);
4549
4550         /* bits 0-15 are the VM contexts0-15 */
4551         /* invalidate the cache */
4552         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4553         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4554                                  WRITE_DATA_DST_SEL(0)));
4555         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4556         amdgpu_ring_write(ring, 0);
4557         amdgpu_ring_write(ring, 1 << vm_id);
4558
4559         /* wait for the invalidate to complete */
4560         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4561         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4562                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
4563                                  WAIT_REG_MEM_ENGINE(0))); /* me */
4564         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4565         amdgpu_ring_write(ring, 0);
4566         amdgpu_ring_write(ring, 0); /* ref */
4567         amdgpu_ring_write(ring, 0); /* mask */
4568         amdgpu_ring_write(ring, 0x20); /* poll interval */
4569
4570         /* compute doesn't have PFP */
4571         if (usepfp) {
4572                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4573                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4574                 amdgpu_ring_write(ring, 0x0);
4575                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4576                 amdgpu_ring_write(ring, 0);
4577                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4578                 amdgpu_ring_write(ring, 0);
4579         }
4580 }
4581
4582 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4583 {
4584         return ring->adev->wb.wb[ring->rptr_offs];
4585 }
4586
4587 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4588 {
4589         return ring->adev->wb.wb[ring->wptr_offs];
4590 }
4591
4592 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4593 {
4594         struct amdgpu_device *adev = ring->adev;
4595
4596         /* XXX check if swapping is necessary on BE */
4597         adev->wb.wb[ring->wptr_offs] = ring->wptr;
4598         WDOORBELL32(ring->doorbell_index, ring->wptr);
4599 }
4600
4601 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4602                                              u64 addr, u64 seq,
4603                                              unsigned flags)
4604 {
4605         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4606         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4607
4608         /* RELEASE_MEM - flush caches, send int */
4609         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4610         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4611                                  EOP_TC_ACTION_EN |
4612                                  EOP_TC_WB_ACTION_EN |
4613                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4614                                  EVENT_INDEX(5)));
4615         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4616         amdgpu_ring_write(ring, addr & 0xfffffffc);
4617         amdgpu_ring_write(ring, upper_32_bits(addr));
4618         amdgpu_ring_write(ring, lower_32_bits(seq));
4619         amdgpu_ring_write(ring, upper_32_bits(seq));
4620 }
4621
4622 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4623                                                  enum amdgpu_interrupt_state state)
4624 {
4625         u32 cp_int_cntl;
4626
4627         switch (state) {
4628         case AMDGPU_IRQ_STATE_DISABLE:
4629                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4630                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4631                                             TIME_STAMP_INT_ENABLE, 0);
4632                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4633                 break;
4634         case AMDGPU_IRQ_STATE_ENABLE:
4635                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4636                 cp_int_cntl =
4637                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4638                                       TIME_STAMP_INT_ENABLE, 1);
4639                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4640                 break;
4641         default:
4642                 break;
4643         }
4644 }
4645
4646 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4647                                                      int me, int pipe,
4648                                                      enum amdgpu_interrupt_state state)
4649 {
4650         u32 mec_int_cntl, mec_int_cntl_reg;
4651
4652         /*
4653          * amdgpu controls only pipe 0 of MEC1. That's why this function only
4654          * handles the setting of interrupts for this specific pipe. All other
4655          * pipes' interrupts are set by amdkfd.
4656          */
4657
4658         if (me == 1) {
4659                 switch (pipe) {
4660                 case 0:
4661                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4662                         break;
4663                 default:
4664                         DRM_DEBUG("invalid pipe %d\n", pipe);
4665                         return;
4666                 }
4667         } else {
4668                 DRM_DEBUG("invalid me %d\n", me);
4669                 return;
4670         }
4671
4672         switch (state) {
4673         case AMDGPU_IRQ_STATE_DISABLE:
4674                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4675                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4676                                              TIME_STAMP_INT_ENABLE, 0);
4677                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4678                 break;
4679         case AMDGPU_IRQ_STATE_ENABLE:
4680                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4681                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4682                                              TIME_STAMP_INT_ENABLE, 1);
4683                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4684                 break;
4685         default:
4686                 break;
4687         }
4688 }
4689
4690 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4691                                              struct amdgpu_irq_src *source,
4692                                              unsigned type,
4693                                              enum amdgpu_interrupt_state state)
4694 {
4695         u32 cp_int_cntl;
4696
4697         switch (state) {
4698         case AMDGPU_IRQ_STATE_DISABLE:
4699                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4700                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4701                                             PRIV_REG_INT_ENABLE, 0);
4702                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4703                 break;
4704         case AMDGPU_IRQ_STATE_ENABLE:
4705                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4706                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4707                                             PRIV_REG_INT_ENABLE, 0);
4708                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4709                 break;
4710         default:
4711                 break;
4712         }
4713
4714         return 0;
4715 }
4716
4717 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4718                                               struct amdgpu_irq_src *source,
4719                                               unsigned type,
4720                                               enum amdgpu_interrupt_state state)
4721 {
4722         u32 cp_int_cntl;
4723
4724         switch (state) {
4725         case AMDGPU_IRQ_STATE_DISABLE:
4726                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4727                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4728                                             PRIV_INSTR_INT_ENABLE, 0);
4729                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4730                 break;
4731         case AMDGPU_IRQ_STATE_ENABLE:
4732                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4733                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4734                                             PRIV_INSTR_INT_ENABLE, 1);
4735                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4736                 break;
4737         default:
4738                 break;
4739         }
4740
4741         return 0;
4742 }
4743
4744 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4745                                             struct amdgpu_irq_src *src,
4746                                             unsigned type,
4747                                             enum amdgpu_interrupt_state state)
4748 {
4749         switch (type) {
4750         case AMDGPU_CP_IRQ_GFX_EOP:
4751                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4752                 break;
4753         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4754                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4755                 break;
4756         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4757                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4758                 break;
4759         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4760                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4761                 break;
4762         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4763                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4764                 break;
4765         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4766                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4767                 break;
4768         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4769                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4770                 break;
4771         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4772                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4773                 break;
4774         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4775                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4776                 break;
4777         default:
4778                 break;
4779         }
4780         return 0;
4781 }
4782
4783 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4784                             struct amdgpu_irq_src *source,
4785                             struct amdgpu_iv_entry *entry)
4786 {
4787         int i;
4788         u8 me_id, pipe_id, queue_id;
4789         struct amdgpu_ring *ring;
4790
4791         DRM_DEBUG("IH: CP EOP\n");
4792         me_id = (entry->ring_id & 0x0c) >> 2;
4793         pipe_id = (entry->ring_id & 0x03) >> 0;
4794         queue_id = (entry->ring_id & 0x70) >> 4;
4795
4796         switch (me_id) {
4797         case 0:
4798                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4799                 break;
4800         case 1:
4801         case 2:
4802                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4803                         ring = &adev->gfx.compute_ring[i];
4804                         /* Per-queue interrupt is supported for MEC starting from VI.
4805                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
4806                           */
4807                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4808                                 amdgpu_fence_process(ring);
4809                 }
4810                 break;
4811         }
4812         return 0;
4813 }
4814
4815 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
4816                                  struct amdgpu_irq_src *source,
4817                                  struct amdgpu_iv_entry *entry)
4818 {
4819         DRM_ERROR("Illegal register access in command stream\n");
4820         schedule_work(&adev->reset_work);
4821         return 0;
4822 }
4823
4824 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
4825                                   struct amdgpu_irq_src *source,
4826                                   struct amdgpu_iv_entry *entry)
4827 {
4828         DRM_ERROR("Illegal instruction in command stream\n");
4829         schedule_work(&adev->reset_work);
4830         return 0;
4831 }
4832
4833 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
4834         .early_init = gfx_v8_0_early_init,
4835         .late_init = gfx_v8_0_late_init,
4836         .sw_init = gfx_v8_0_sw_init,
4837         .sw_fini = gfx_v8_0_sw_fini,
4838         .hw_init = gfx_v8_0_hw_init,
4839         .hw_fini = gfx_v8_0_hw_fini,
4840         .suspend = gfx_v8_0_suspend,
4841         .resume = gfx_v8_0_resume,
4842         .is_idle = gfx_v8_0_is_idle,
4843         .wait_for_idle = gfx_v8_0_wait_for_idle,
4844         .soft_reset = gfx_v8_0_soft_reset,
4845         .print_status = gfx_v8_0_print_status,
4846         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
4847         .set_powergating_state = gfx_v8_0_set_powergating_state,
4848 };
4849
4850 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
4851         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
4852         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
4853         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
4854         .parse_cs = NULL,
4855         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
4856         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
4857         .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4858         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4859         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
4860         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
4861         .test_ring = gfx_v8_0_ring_test_ring,
4862         .test_ib = gfx_v8_0_ring_test_ib,
4863         .insert_nop = amdgpu_ring_insert_nop,
4864 };
4865
4866 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
4867         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
4868         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
4869         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
4870         .parse_cs = NULL,
4871         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
4872         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
4873         .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4874         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4875         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
4876         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
4877         .test_ring = gfx_v8_0_ring_test_ring,
4878         .test_ib = gfx_v8_0_ring_test_ib,
4879         .insert_nop = amdgpu_ring_insert_nop,
4880 };
4881
4882 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
4883 {
4884         int i;
4885
4886         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4887                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
4888
4889         for (i = 0; i < adev->gfx.num_compute_rings; i++)
4890                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
4891 }
4892
4893 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
4894         .set = gfx_v8_0_set_eop_interrupt_state,
4895         .process = gfx_v8_0_eop_irq,
4896 };
4897
4898 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
4899         .set = gfx_v8_0_set_priv_reg_fault_state,
4900         .process = gfx_v8_0_priv_reg_irq,
4901 };
4902
4903 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
4904         .set = gfx_v8_0_set_priv_inst_fault_state,
4905         .process = gfx_v8_0_priv_inst_irq,
4906 };
4907
4908 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
4909 {
4910         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
4911         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
4912
4913         adev->gfx.priv_reg_irq.num_types = 1;
4914         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
4915
4916         adev->gfx.priv_inst_irq.num_types = 1;
4917         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4918 }
4919
4920 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
4921 {
4922         /* init asci gds info */
4923         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
4924         adev->gds.gws.total_size = 64;
4925         adev->gds.oa.total_size = 16;
4926
4927         if (adev->gds.mem.total_size == 64 * 1024) {
4928                 adev->gds.mem.gfx_partition_size = 4096;
4929                 adev->gds.mem.cs_partition_size = 4096;
4930
4931                 adev->gds.gws.gfx_partition_size = 4;
4932                 adev->gds.gws.cs_partition_size = 4;
4933
4934                 adev->gds.oa.gfx_partition_size = 4;
4935                 adev->gds.oa.cs_partition_size = 1;
4936         } else {
4937                 adev->gds.mem.gfx_partition_size = 1024;
4938                 adev->gds.mem.cs_partition_size = 1024;
4939
4940                 adev->gds.gws.gfx_partition_size = 16;
4941                 adev->gds.gws.cs_partition_size = 16;
4942
4943                 adev->gds.oa.gfx_partition_size = 4;
4944                 adev->gds.oa.cs_partition_size = 4;
4945         }
4946 }
4947
4948 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
4949                 u32 se, u32 sh)
4950 {
4951         u32 mask = 0, tmp, tmp1;
4952         int i;
4953
4954         gfx_v8_0_select_se_sh(adev, se, sh);
4955         tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
4956         tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
4957         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4958
4959         tmp &= 0xffff0000;
4960
4961         tmp |= tmp1;
4962         tmp >>= 16;
4963
4964         for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
4965                 mask <<= 1;
4966                 mask |= 1;
4967         }
4968
4969         return (~tmp) & mask;
4970 }
4971
4972 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
4973                                                  struct amdgpu_cu_info *cu_info)
4974 {
4975         int i, j, k, counter, active_cu_number = 0;
4976         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4977
4978         if (!adev || !cu_info)
4979                 return -EINVAL;
4980
4981         mutex_lock(&adev->grbm_idx_mutex);
4982         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4983                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4984                         mask = 1;
4985                         ao_bitmap = 0;
4986                         counter = 0;
4987                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
4988                         cu_info->bitmap[i][j] = bitmap;
4989
4990                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
4991                                 if (bitmap & mask) {
4992                                         if (counter < 2)
4993                                                 ao_bitmap |= mask;
4994                                         counter ++;
4995                                 }
4996                                 mask <<= 1;
4997                         }
4998                         active_cu_number += counter;
4999                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5000                 }
5001         }
5002
5003         cu_info->number = active_cu_number;
5004         cu_info->ao_cu_mask = ao_cu_mask;
5005         mutex_unlock(&adev->grbm_idx_mutex);
5006         return 0;
5007 }