drm/amdgpu/gfx8: properly disable the KCQs in hw_fini
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
aeab2032 28#include "vi_structs.h"
aaa36a97
AD
29#include "vid.h"
30#include "amdgpu_ucode.h"
68182d90 31#include "amdgpu_atombios.h"
eeade25a 32#include "atombios_i2c.h"
aaa36a97
AD
33#include "clearstate_vi.h"
34
35#include "gmc/gmc_8_2_d.h"
36#include "gmc/gmc_8_2_sh_mask.h"
37
38#include "oss/oss_3_0_d.h"
39#include "oss/oss_3_0_sh_mask.h"
40
41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h"
43
44#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h"
47#include "gca/gfx_8_0_enum.h"
48
aaa36a97
AD
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
d9d533c1
KW
52#include "smu/smu_7_1_3_d.h"
53
aaa36a97
AD
54#define GFX8_NUM_GFX_RINGS 1
55#define GFX8_NUM_COMPUTE_RINGS 8
56
57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
6e378858
EH
72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
78
79/* BPM SERDES CMD */
80#define SET_BPM_SERDES_CMD 1
81#define CLE_BPM_SERDES_CMD 0
82
83/* BPM Register Address*/
84enum {
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
90 BPM_REG_FGCG_MAX
91};
92
2b6cd977
EH
93#define RLC_FormatDirectRegListLength 14
94
c65444fe
JZ
95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
e3c7656c
SL
102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
c65444fe
JZ
108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 120
af15a2d5
DZ
121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
2cc0c0b5
FC
128MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
68182d90 134
2cc0c0b5
FC
135MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 141
c4642a47
JZ
142MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
aaa36a97
AD
149static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150{
151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167};
168
169static const u32 golden_settings_tonga_a11[] =
170{
171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
ff9d6460 178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
187};
188
189static const u32 tonga_golden_common_all[] =
190{
191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199};
200
201static const u32 tonga_mgcg_cgcg_init[] =
202{
203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278};
279
2cc0c0b5 280static const u32 golden_settings_polaris11_a11[] =
68182d90 281{
9761bc53
HR
282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
9761bc53 292 mmSQ_CONFIG, 0x07f80000, 0x01180000,
68182d90
FC
293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
299};
300
2cc0c0b5 301static const u32 polaris11_golden_common_all[] =
68182d90
FC
302{
303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309};
310
2cc0c0b5 311static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
312{
313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
a5a5e308
HR
314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
330};
331
2cc0c0b5 332static const u32 polaris10_golden_common_all[] =
68182d90
FC
333{
334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342};
343
af15a2d5
DZ
344static const u32 fiji_golden_common_all[] =
345{
346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
356};
357
358static const u32 golden_settings_fiji_a10[] =
359{
360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
367 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
371};
372
373static const u32 fiji_mgcg_cgcg_init[] =
374{
a7ca8ef9 375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410};
411
aaa36a97
AD
412static const u32 golden_settings_iceland_a11[] =
413{
414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
fe85f07f 422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430};
431
432static const u32 iceland_golden_common_all[] =
433{
434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442};
443
444static const u32 iceland_mgcg_cgcg_init[] =
445{
446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510};
511
512static const u32 cz_golden_settings_a11[] =
513{
514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
3a494b58 519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
3a494b58 522 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526};
527
528static const u32 cz_golden_common_all[] =
529{
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538};
539
540static const u32 cz_mgcg_cgcg_init[] =
541{
542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617};
618
e3c7656c
SL
619static const u32 stoney_golden_settings_a11[] =
620{
621 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 627 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631};
632
633static const u32 stoney_golden_common_all[] =
634{
635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643};
644
645static const u32 stoney_mgcg_cgcg_init[] =
646{
647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
e3c7656c
SL
652};
653
aaa36a97
AD
654static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 657static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 658static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 659static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
acad2b2a
ML
660static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
0875a242
AD
662static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
aaa36a97
AD
664
665static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666{
667 switch (adev->asic_type) {
668 case CHIP_TOPAZ:
669 amdgpu_program_register_sequence(adev,
670 iceland_mgcg_cgcg_init,
671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672 amdgpu_program_register_sequence(adev,
673 golden_settings_iceland_a11,
674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675 amdgpu_program_register_sequence(adev,
676 iceland_golden_common_all,
677 (const u32)ARRAY_SIZE(iceland_golden_common_all));
678 break;
af15a2d5
DZ
679 case CHIP_FIJI:
680 amdgpu_program_register_sequence(adev,
681 fiji_mgcg_cgcg_init,
682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 golden_settings_fiji_a10,
685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686 amdgpu_program_register_sequence(adev,
687 fiji_golden_common_all,
688 (const u32)ARRAY_SIZE(fiji_golden_common_all));
689 break;
690
aaa36a97
AD
691 case CHIP_TONGA:
692 amdgpu_program_register_sequence(adev,
693 tonga_mgcg_cgcg_init,
694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695 amdgpu_program_register_sequence(adev,
696 golden_settings_tonga_a11,
697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698 amdgpu_program_register_sequence(adev,
699 tonga_golden_common_all,
700 (const u32)ARRAY_SIZE(tonga_golden_common_all));
701 break;
2cc0c0b5 702 case CHIP_POLARIS11:
c4642a47 703 case CHIP_POLARIS12:
68182d90 704 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
705 golden_settings_polaris11_a11,
706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
68182d90 707 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
708 polaris11_golden_common_all,
709 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
68182d90 710 break;
2cc0c0b5 711 case CHIP_POLARIS10:
68182d90 712 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
713 golden_settings_polaris10_a11,
714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
68182d90 715 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
716 polaris10_golden_common_all,
717 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
5765a36d
RZ
719 if (adev->pdev->revision == 0xc7 &&
720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
eeade25a
KW
723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725 }
68182d90 726 break;
aaa36a97
AD
727 case CHIP_CARRIZO:
728 amdgpu_program_register_sequence(adev,
729 cz_mgcg_cgcg_init,
730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_settings_a11,
733 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734 amdgpu_program_register_sequence(adev,
735 cz_golden_common_all,
736 (const u32)ARRAY_SIZE(cz_golden_common_all));
737 break;
e3c7656c
SL
738 case CHIP_STONEY:
739 amdgpu_program_register_sequence(adev,
740 stoney_mgcg_cgcg_init,
741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_settings_a11,
744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745 amdgpu_program_register_sequence(adev,
746 stoney_golden_common_all,
747 (const u32)ARRAY_SIZE(stoney_golden_common_all));
748 break;
aaa36a97
AD
749 default:
750 break;
751 }
752}
753
754static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755{
aaa36a97
AD
756 adev->gfx.scratch.num_reg = 7;
757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
50261151 758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
aaa36a97
AD
759}
760
761static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762{
763 struct amdgpu_device *adev = ring->adev;
764 uint32_t scratch;
765 uint32_t tmp = 0;
766 unsigned i;
767 int r;
768
769 r = amdgpu_gfx_scratch_get(adev, &scratch);
770 if (r) {
771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772 return r;
773 }
774 WREG32(scratch, 0xCAFEDEAD);
a27de35c 775 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
776 if (r) {
777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778 ring->idx, r);
779 amdgpu_gfx_scratch_free(adev, scratch);
780 return r;
781 }
782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 785 amdgpu_ring_commit(ring);
aaa36a97
AD
786
787 for (i = 0; i < adev->usec_timeout; i++) {
788 tmp = RREG32(scratch);
789 if (tmp == 0xDEADBEEF)
790 break;
791 DRM_UDELAY(1);
792 }
793 if (i < adev->usec_timeout) {
794 DRM_INFO("ring test on %d succeeded in %d usecs\n",
795 ring->idx, i);
796 } else {
797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798 ring->idx, scratch, tmp);
799 r = -EINVAL;
800 }
801 amdgpu_gfx_scratch_free(adev, scratch);
802 return r;
803}
804
bbec97aa 805static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
aaa36a97
AD
806{
807 struct amdgpu_device *adev = ring->adev;
808 struct amdgpu_ib ib;
f54d1867 809 struct dma_fence *f = NULL;
aaa36a97
AD
810 uint32_t scratch;
811 uint32_t tmp = 0;
bbec97aa 812 long r;
aaa36a97
AD
813
814 r = amdgpu_gfx_scratch_get(adev, &scratch);
815 if (r) {
bbec97aa 816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
aaa36a97
AD
817 return r;
818 }
819 WREG32(scratch, 0xCAFEDEAD);
b203dd95 820 memset(&ib, 0, sizeof(ib));
b07c60c0 821 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97 822 if (r) {
bbec97aa 823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
42d13693 824 goto err1;
aaa36a97
AD
825 }
826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828 ib.ptr[2] = 0xDEADBEEF;
829 ib.length_dw = 3;
42d13693 830
50ddc75e 831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
42d13693
CZ
832 if (r)
833 goto err2;
834
f54d1867 835 r = dma_fence_wait_timeout(f, false, timeout);
bbec97aa
CK
836 if (r == 0) {
837 DRM_ERROR("amdgpu: IB test timed out.\n");
838 r = -ETIMEDOUT;
839 goto err2;
840 } else if (r < 0) {
841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
42d13693 842 goto err2;
aaa36a97 843 }
6d44565d
CK
844 tmp = RREG32(scratch);
845 if (tmp == 0xDEADBEEF) {
846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
bbec97aa 847 r = 0;
aaa36a97
AD
848 } else {
849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850 scratch, tmp);
851 r = -EINVAL;
852 }
42d13693 853err2:
cc55c45d 854 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 855 dma_fence_put(f);
42d13693
CZ
856err1:
857 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
858 return r;
859}
860
13331ac3
ML
861
862static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863 release_firmware(adev->gfx.pfp_fw);
864 adev->gfx.pfp_fw = NULL;
865 release_firmware(adev->gfx.me_fw);
866 adev->gfx.me_fw = NULL;
867 release_firmware(adev->gfx.ce_fw);
868 adev->gfx.ce_fw = NULL;
869 release_firmware(adev->gfx.rlc_fw);
870 adev->gfx.rlc_fw = NULL;
871 release_firmware(adev->gfx.mec_fw);
872 adev->gfx.mec_fw = NULL;
873 if ((adev->asic_type != CHIP_STONEY) &&
874 (adev->asic_type != CHIP_TOPAZ))
875 release_firmware(adev->gfx.mec2_fw);
876 adev->gfx.mec2_fw = NULL;
877
878 kfree(adev->gfx.rlc.register_list_format);
879}
880
aaa36a97
AD
881static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882{
883 const char *chip_name;
884 char fw_name[30];
885 int err;
886 struct amdgpu_firmware_info *info = NULL;
887 const struct common_firmware_header *header = NULL;
595fd013 888 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
889 const struct rlc_firmware_header_v2_0 *rlc_hdr;
890 unsigned int *tmp = NULL, i;
aaa36a97
AD
891
892 DRM_DEBUG("\n");
893
894 switch (adev->asic_type) {
895 case CHIP_TOPAZ:
896 chip_name = "topaz";
897 break;
898 case CHIP_TONGA:
899 chip_name = "tonga";
900 break;
901 case CHIP_CARRIZO:
902 chip_name = "carrizo";
903 break;
af15a2d5
DZ
904 case CHIP_FIJI:
905 chip_name = "fiji";
906 break;
2cc0c0b5
FC
907 case CHIP_POLARIS11:
908 chip_name = "polaris11";
68182d90 909 break;
2cc0c0b5
FC
910 case CHIP_POLARIS10:
911 chip_name = "polaris10";
68182d90 912 break;
c4642a47
JZ
913 case CHIP_POLARIS12:
914 chip_name = "polaris12";
915 break;
e3c7656c
SL
916 case CHIP_STONEY:
917 chip_name = "stoney";
918 break;
aaa36a97
AD
919 default:
920 BUG();
921 }
922
c65444fe 923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925 if (err)
926 goto out;
927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928 if (err)
929 goto out;
595fd013
JZ
930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 933
c65444fe 934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 if (err)
937 goto out;
938 err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 if (err)
940 goto out;
595fd013
JZ
941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
ae65a26d
ML
943
944 /* chain ib ucode isn't formal released, just disable it by far
945 * TODO: when ucod ready we should use ucode version to judge if
946 * chain-ib support or not.
947 */
948 adev->virt.chained_ib_support = false;
949
595fd013 950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 951
c65444fe 952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
953 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
954 if (err)
955 goto out;
956 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
957 if (err)
958 goto out;
595fd013
JZ
959 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 962
c65444fe 963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
965 if (err)
966 goto out;
967 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
968 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
971
972 adev->gfx.rlc.save_and_restore_offset =
973 le32_to_cpu(rlc_hdr->save_and_restore_offset);
974 adev->gfx.rlc.clear_state_descriptor_offset =
975 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976 adev->gfx.rlc.avail_scratch_ram_locations =
977 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978 adev->gfx.rlc.reg_restore_list_size =
979 le32_to_cpu(rlc_hdr->reg_restore_list_size);
980 adev->gfx.rlc.reg_list_format_start =
981 le32_to_cpu(rlc_hdr->reg_list_format_start);
982 adev->gfx.rlc.reg_list_format_separate_start =
983 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984 adev->gfx.rlc.starting_offsets_start =
985 le32_to_cpu(rlc_hdr->starting_offsets_start);
986 adev->gfx.rlc.reg_list_format_size_bytes =
987 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988 adev->gfx.rlc.reg_list_size_bytes =
989 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
990
991 adev->gfx.rlc.register_list_format =
992 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
994
995 if (!adev->gfx.rlc.register_list_format) {
996 err = -ENOMEM;
997 goto out;
998 }
999
ae17c999 1000 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1001 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1004
1005 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1006
ae17c999 1007 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1008 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 1011
c65444fe 1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
1013 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1014 if (err)
1015 goto out;
1016 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1017 if (err)
1018 goto out;
595fd013
JZ
1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1022
97dde76a
AD
1023 if ((adev->asic_type != CHIP_STONEY) &&
1024 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1027 if (!err) {
1028 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1029 if (err)
1030 goto out;
1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032 adev->gfx.mec2_fw->data;
1033 adev->gfx.mec2_fw_version =
1034 le32_to_cpu(cp_hdr->header.ucode_version);
1035 adev->gfx.mec2_feature_version =
1036 le32_to_cpu(cp_hdr->ucode_feature_version);
1037 } else {
1038 err = 0;
1039 adev->gfx.mec2_fw = NULL;
1040 }
aaa36a97
AD
1041 }
1042
e635ee07 1043 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
aaa36a97
AD
1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046 info->fw = adev->gfx.pfp_fw;
1047 header = (const struct common_firmware_header *)info->fw->data;
1048 adev->firmware.fw_size +=
1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053 info->fw = adev->gfx.me_fw;
1054 header = (const struct common_firmware_header *)info->fw->data;
1055 adev->firmware.fw_size +=
1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060 info->fw = adev->gfx.ce_fw;
1061 header = (const struct common_firmware_header *)info->fw->data;
1062 adev->firmware.fw_size +=
1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1064
1065 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067 info->fw = adev->gfx.rlc_fw;
1068 header = (const struct common_firmware_header *)info->fw->data;
1069 adev->firmware.fw_size +=
1070 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1071
1072 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074 info->fw = adev->gfx.mec_fw;
1075 header = (const struct common_firmware_header *)info->fw->data;
1076 adev->firmware.fw_size +=
1077 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1078
4c2b2453
ML
1079 /* we need account JT in */
1080 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081 adev->firmware.fw_size +=
1082 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1083
bed5712e
ML
1084 if (amdgpu_sriov_vf(adev)) {
1085 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087 info->fw = adev->gfx.mec_fw;
1088 adev->firmware.fw_size +=
1089 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1090 }
1091
aaa36a97
AD
1092 if (adev->gfx.mec2_fw) {
1093 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095 info->fw = adev->gfx.mec2_fw;
1096 header = (const struct common_firmware_header *)info->fw->data;
1097 adev->firmware.fw_size +=
1098 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1099 }
1100
1101 }
1102
1103out:
1104 if (err) {
1105 dev_err(adev->dev,
1106 "gfx8: Failed to load firmware \"%s\"\n",
1107 fw_name);
1108 release_firmware(adev->gfx.pfp_fw);
1109 adev->gfx.pfp_fw = NULL;
1110 release_firmware(adev->gfx.me_fw);
1111 adev->gfx.me_fw = NULL;
1112 release_firmware(adev->gfx.ce_fw);
1113 adev->gfx.ce_fw = NULL;
1114 release_firmware(adev->gfx.rlc_fw);
1115 adev->gfx.rlc_fw = NULL;
1116 release_firmware(adev->gfx.mec_fw);
1117 adev->gfx.mec_fw = NULL;
1118 release_firmware(adev->gfx.mec2_fw);
1119 adev->gfx.mec2_fw = NULL;
1120 }
1121 return err;
1122}
1123
2b6cd977
EH
1124static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125 volatile u32 *buffer)
1126{
1127 u32 count = 0, i;
1128 const struct cs_section_def *sect = NULL;
1129 const struct cs_extent_def *ext = NULL;
1130
1131 if (adev->gfx.rlc.cs_data == NULL)
1132 return;
1133 if (buffer == NULL)
1134 return;
1135
1136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1138
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140 buffer[count++] = cpu_to_le32(0x80000000);
1141 buffer[count++] = cpu_to_le32(0x80000000);
1142
1143 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145 if (sect->id == SECT_CONTEXT) {
1146 buffer[count++] =
1147 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148 buffer[count++] = cpu_to_le32(ext->reg_index -
1149 PACKET3_SET_CONTEXT_REG_START);
1150 for (i = 0; i < ext->reg_count; i++)
1151 buffer[count++] = cpu_to_le32(ext->extent[i]);
1152 } else {
1153 return;
1154 }
1155 }
1156 }
1157
1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160 PACKET3_SET_CONTEXT_REG_START);
34817db6
AD
1161 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
2b6cd977
EH
1163
1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1166
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168 buffer[count++] = cpu_to_le32(0);
1169}
1170
fb16007b
AD
1171static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1172{
1173 const __le32 *fw_data;
1174 volatile u32 *dst_ptr;
1175 int me, i, max_me = 4;
1176 u32 bo_offset = 0;
1177 u32 table_offset, table_size;
1178
1179 if (adev->asic_type == CHIP_CARRIZO)
1180 max_me = 5;
1181
1182 /* write the cp table buffer */
1183 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184 for (me = 0; me < max_me; me++) {
1185 if (me == 0) {
1186 const struct gfx_firmware_header_v1_0 *hdr =
1187 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188 fw_data = (const __le32 *)
1189 (adev->gfx.ce_fw->data +
1190 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191 table_offset = le32_to_cpu(hdr->jt_offset);
1192 table_size = le32_to_cpu(hdr->jt_size);
1193 } else if (me == 1) {
1194 const struct gfx_firmware_header_v1_0 *hdr =
1195 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196 fw_data = (const __le32 *)
1197 (adev->gfx.pfp_fw->data +
1198 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199 table_offset = le32_to_cpu(hdr->jt_offset);
1200 table_size = le32_to_cpu(hdr->jt_size);
1201 } else if (me == 2) {
1202 const struct gfx_firmware_header_v1_0 *hdr =
1203 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204 fw_data = (const __le32 *)
1205 (adev->gfx.me_fw->data +
1206 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207 table_offset = le32_to_cpu(hdr->jt_offset);
1208 table_size = le32_to_cpu(hdr->jt_size);
1209 } else if (me == 3) {
1210 const struct gfx_firmware_header_v1_0 *hdr =
1211 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212 fw_data = (const __le32 *)
1213 (adev->gfx.mec_fw->data +
1214 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215 table_offset = le32_to_cpu(hdr->jt_offset);
1216 table_size = le32_to_cpu(hdr->jt_size);
1217 } else if (me == 4) {
1218 const struct gfx_firmware_header_v1_0 *hdr =
1219 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220 fw_data = (const __le32 *)
1221 (adev->gfx.mec2_fw->data +
1222 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223 table_offset = le32_to_cpu(hdr->jt_offset);
1224 table_size = le32_to_cpu(hdr->jt_size);
1225 }
1226
1227 for (i = 0; i < table_size; i ++) {
1228 dst_ptr[bo_offset + i] =
1229 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1230 }
1231
1232 bo_offset += table_size;
1233 }
1234}
1235
2b6cd977
EH
1236static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1237{
1238 int r;
1239
1240 /* clear state block */
1241 if (adev->gfx.rlc.clear_state_obj) {
c81a1a74 1242 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
2b6cd977 1243 if (unlikely(r != 0))
62d2ce4b 1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
2b6cd977
EH
1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2b6cd977
EH
1247 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248 adev->gfx.rlc.clear_state_obj = NULL;
1249 }
fb16007b
AD
1250
1251 /* jump table block */
1252 if (adev->gfx.rlc.cp_table_obj) {
c81a1a74 1253 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
fb16007b
AD
1254 if (unlikely(r != 0))
1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1258 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259 adev->gfx.rlc.cp_table_obj = NULL;
1260 }
2b6cd977
EH
1261}
1262
1263static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1264{
1265 volatile u32 *dst_ptr;
1266 u32 dws;
1267 const struct cs_section_def *cs_data;
1268 int r;
1269
1270 adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272 cs_data = adev->gfx.rlc.cs_data;
1273
1274 if (cs_data) {
1275 /* clear state block */
1276 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1277
1278 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
2b6cd977
EH
1283 NULL, NULL,
1284 &adev->gfx.rlc.clear_state_obj);
1285 if (r) {
1286 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287 gfx_v8_0_rlc_fini(adev);
1288 return r;
1289 }
1290 }
1291 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292 if (unlikely(r != 0)) {
1293 gfx_v8_0_rlc_fini(adev);
1294 return r;
1295 }
1296 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297 &adev->gfx.rlc.clear_state_gpu_addr);
1298 if (r) {
1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
62d2ce4b 1300 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
2b6cd977
EH
1301 gfx_v8_0_rlc_fini(adev);
1302 return r;
1303 }
1304
1305 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1306 if (r) {
62d2ce4b 1307 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
2b6cd977
EH
1308 gfx_v8_0_rlc_fini(adev);
1309 return r;
1310 }
1311 /* set up the cs buffer */
1312 dst_ptr = adev->gfx.rlc.cs_ptr;
1313 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1316 }
1317
fb16007b
AD
1318 if ((adev->asic_type == CHIP_CARRIZO) ||
1319 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1320 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
fb16007b
AD
1321 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1324 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
fb16007b
AD
1326 NULL, NULL,
1327 &adev->gfx.rlc.cp_table_obj);
1328 if (r) {
1329 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1330 return r;
1331 }
1332 }
1333
1334 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335 if (unlikely(r != 0)) {
1336 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1337 return r;
1338 }
1339 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340 &adev->gfx.rlc.cp_table_gpu_addr);
1341 if (r) {
1342 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
62d2ce4b 1343 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
fb16007b
AD
1344 return r;
1345 }
1346 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1347 if (r) {
1348 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1349 return r;
1350 }
1351
1352 cz_init_cp_jump_table(adev);
1353
1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1356 }
1357
2b6cd977
EH
1358 return 0;
1359}
1360
aaa36a97
AD
1361static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362{
1363 int r;
1364
1365 if (adev->gfx.mec.hpd_eop_obj) {
c81a1a74 1366 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
aaa36a97
AD
1367 if (unlikely(r != 0))
1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
aaa36a97
AD
1371 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372 adev->gfx.mec.hpd_eop_obj = NULL;
1373 }
1374}
1375
4e638ae9
XY
1376static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377 struct amdgpu_ring *ring,
1378 struct amdgpu_irq_src *irq)
1379{
34534610 1380 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4e638ae9
XY
1381 int r = 0;
1382
bffa2280
ML
1383 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1384 if (r)
1385 return r;
880e87e3 1386
4e638ae9
XY
1387 ring->adev = NULL;
1388 ring->ring_obj = NULL;
1389 ring->use_doorbell = true;
1390 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1391 if (adev->gfx.mec2_fw) {
1392 ring->me = 2;
1393 ring->pipe = 0;
1394 } else {
1395 ring->me = 1;
1396 ring->pipe = 1;
1397 }
1398
4e638ae9 1399 ring->queue = 0;
34534610 1400 ring->eop_gpu_addr = kiq->eop_gpu_addr;
4e638ae9
XY
1401 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1402 r = amdgpu_ring_init(adev, ring, 1024,
1403 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1404 if (r)
1405 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1406
1407 return r;
1408}
4e638ae9
XY
1409static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410 struct amdgpu_irq_src *irq)
1411{
bffa2280 1412 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
4e638ae9 1413 amdgpu_ring_fini(ring);
4e638ae9
XY
1414}
1415
aaa36a97
AD
1416#define MEC_HPD_SIZE 2048
1417
1418static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1419{
1420 int r;
1421 u32 *hpd;
1422
1423 /*
1424 * we assign only 1 pipe because all other pipes will
1425 * be handled by KFD
1426 */
1427 adev->gfx.mec.num_mec = 1;
1428 adev->gfx.mec.num_pipe = 1;
1429 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1430
1431 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1432 r = amdgpu_bo_create(adev,
ad3b9614 1433 adev->gfx.mec.num_queue * MEC_HPD_SIZE,
aaa36a97 1434 PAGE_SIZE, true,
72d7668b 1435 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
1436 &adev->gfx.mec.hpd_eop_obj);
1437 if (r) {
1438 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1439 return r;
1440 }
1441 }
1442
1443 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1444 if (unlikely(r != 0)) {
1445 gfx_v8_0_mec_fini(adev);
1446 return r;
1447 }
1448 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1449 &adev->gfx.mec.hpd_eop_gpu_addr);
1450 if (r) {
1451 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1452 gfx_v8_0_mec_fini(adev);
1453 return r;
1454 }
1455 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1456 if (r) {
1457 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1458 gfx_v8_0_mec_fini(adev);
1459 return r;
1460 }
1461
ad3b9614 1462 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
aaa36a97
AD
1463
1464 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1465 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1466
1467 return 0;
1468}
1469
4e638ae9
XY
1470static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1471{
1472 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1473
1474 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
4e638ae9
XY
1475}
1476
1477static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1478{
1479 int r;
1480 u32 *hpd;
1481 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1482
1483 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1484 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1485 &kiq->eop_gpu_addr, (void **)&hpd);
1486 if (r) {
1487 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1488 return r;
1489 }
1490
1491 memset(hpd, 0, MEC_HPD_SIZE);
1492
c81a1a74 1493 r = amdgpu_bo_reserve(kiq->eop_obj, true);
f2effd49
AD
1494 if (unlikely(r != 0))
1495 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
4e638ae9 1496 amdgpu_bo_kunmap(kiq->eop_obj);
f2effd49 1497 amdgpu_bo_unreserve(kiq->eop_obj);
4e638ae9
XY
1498
1499 return 0;
1500}
1501
ccba7691
AD
1502static const u32 vgpr_init_compute_shader[] =
1503{
1504 0x7e000209, 0x7e020208,
1505 0x7e040207, 0x7e060206,
1506 0x7e080205, 0x7e0a0204,
1507 0x7e0c0203, 0x7e0e0202,
1508 0x7e100201, 0x7e120200,
1509 0x7e140209, 0x7e160208,
1510 0x7e180207, 0x7e1a0206,
1511 0x7e1c0205, 0x7e1e0204,
1512 0x7e200203, 0x7e220202,
1513 0x7e240201, 0x7e260200,
1514 0x7e280209, 0x7e2a0208,
1515 0x7e2c0207, 0x7e2e0206,
1516 0x7e300205, 0x7e320204,
1517 0x7e340203, 0x7e360202,
1518 0x7e380201, 0x7e3a0200,
1519 0x7e3c0209, 0x7e3e0208,
1520 0x7e400207, 0x7e420206,
1521 0x7e440205, 0x7e460204,
1522 0x7e480203, 0x7e4a0202,
1523 0x7e4c0201, 0x7e4e0200,
1524 0x7e500209, 0x7e520208,
1525 0x7e540207, 0x7e560206,
1526 0x7e580205, 0x7e5a0204,
1527 0x7e5c0203, 0x7e5e0202,
1528 0x7e600201, 0x7e620200,
1529 0x7e640209, 0x7e660208,
1530 0x7e680207, 0x7e6a0206,
1531 0x7e6c0205, 0x7e6e0204,
1532 0x7e700203, 0x7e720202,
1533 0x7e740201, 0x7e760200,
1534 0x7e780209, 0x7e7a0208,
1535 0x7e7c0207, 0x7e7e0206,
1536 0xbf8a0000, 0xbf810000,
1537};
1538
1539static const u32 sgpr_init_compute_shader[] =
1540{
1541 0xbe8a0100, 0xbe8c0102,
1542 0xbe8e0104, 0xbe900106,
1543 0xbe920108, 0xbe940100,
1544 0xbe960102, 0xbe980104,
1545 0xbe9a0106, 0xbe9c0108,
1546 0xbe9e0100, 0xbea00102,
1547 0xbea20104, 0xbea40106,
1548 0xbea60108, 0xbea80100,
1549 0xbeaa0102, 0xbeac0104,
1550 0xbeae0106, 0xbeb00108,
1551 0xbeb20100, 0xbeb40102,
1552 0xbeb60104, 0xbeb80106,
1553 0xbeba0108, 0xbebc0100,
1554 0xbebe0102, 0xbec00104,
1555 0xbec20106, 0xbec40108,
1556 0xbec60100, 0xbec80102,
1557 0xbee60004, 0xbee70005,
1558 0xbeea0006, 0xbeeb0007,
1559 0xbee80008, 0xbee90009,
1560 0xbefc0000, 0xbf8a0000,
1561 0xbf810000, 0x00000000,
1562};
1563
1564static const u32 vgpr_init_regs[] =
1565{
1566 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1567 mmCOMPUTE_RESOURCE_LIMITS, 0,
1568 mmCOMPUTE_NUM_THREAD_X, 256*4,
1569 mmCOMPUTE_NUM_THREAD_Y, 1,
1570 mmCOMPUTE_NUM_THREAD_Z, 1,
1571 mmCOMPUTE_PGM_RSRC2, 20,
1572 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1573 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1574 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1575 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1576 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1577 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1578 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1579 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1580 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1581 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1582};
1583
1584static const u32 sgpr1_init_regs[] =
1585{
1586 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1587 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1588 mmCOMPUTE_NUM_THREAD_X, 256*5,
1589 mmCOMPUTE_NUM_THREAD_Y, 1,
1590 mmCOMPUTE_NUM_THREAD_Z, 1,
1591 mmCOMPUTE_PGM_RSRC2, 20,
1592 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1593 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1594 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1595 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1596 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1597 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1598 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1599 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1600 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1601 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1602};
1603
1604static const u32 sgpr2_init_regs[] =
1605{
1606 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1607 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1608 mmCOMPUTE_NUM_THREAD_X, 256*5,
1609 mmCOMPUTE_NUM_THREAD_Y, 1,
1610 mmCOMPUTE_NUM_THREAD_Z, 1,
1611 mmCOMPUTE_PGM_RSRC2, 20,
1612 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1613 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1614 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1615 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1616 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1617 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1618 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1619 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1620 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1621 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1622};
1623
1624static const u32 sec_ded_counter_registers[] =
1625{
1626 mmCPC_EDC_ATC_CNT,
1627 mmCPC_EDC_SCRATCH_CNT,
1628 mmCPC_EDC_UCODE_CNT,
1629 mmCPF_EDC_ATC_CNT,
1630 mmCPF_EDC_ROQ_CNT,
1631 mmCPF_EDC_TAG_CNT,
1632 mmCPG_EDC_ATC_CNT,
1633 mmCPG_EDC_DMA_CNT,
1634 mmCPG_EDC_TAG_CNT,
1635 mmDC_EDC_CSINVOC_CNT,
1636 mmDC_EDC_RESTORE_CNT,
1637 mmDC_EDC_STATE_CNT,
1638 mmGDS_EDC_CNT,
1639 mmGDS_EDC_GRBM_CNT,
1640 mmGDS_EDC_OA_DED,
1641 mmSPI_EDC_CNT,
1642 mmSQC_ATC_EDC_GATCL1_CNT,
1643 mmSQC_EDC_CNT,
1644 mmSQ_EDC_DED_CNT,
1645 mmSQ_EDC_INFO,
1646 mmSQ_EDC_SEC_CNT,
1647 mmTCC_EDC_CNT,
1648 mmTCP_ATC_EDC_GATCL1_CNT,
1649 mmTCP_EDC_CNT,
1650 mmTD_EDC_CNT
1651};
1652
1653static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1654{
1655 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1656 struct amdgpu_ib ib;
f54d1867 1657 struct dma_fence *f = NULL;
ccba7691
AD
1658 int r, i;
1659 u32 tmp;
1660 unsigned total_size, vgpr_offset, sgpr_offset;
1661 u64 gpu_addr;
1662
1663 /* only supported on CZ */
1664 if (adev->asic_type != CHIP_CARRIZO)
1665 return 0;
1666
1667 /* bail if the compute ring is not ready */
1668 if (!ring->ready)
1669 return 0;
1670
1671 tmp = RREG32(mmGB_EDC_MODE);
1672 WREG32(mmGB_EDC_MODE, 0);
1673
1674 total_size =
1675 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1676 total_size +=
1677 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1678 total_size +=
1679 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1680 total_size = ALIGN(total_size, 256);
1681 vgpr_offset = total_size;
1682 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1683 sgpr_offset = total_size;
1684 total_size += sizeof(sgpr_init_compute_shader);
1685
1686 /* allocate an indirect buffer to put the commands in */
1687 memset(&ib, 0, sizeof(ib));
b07c60c0 1688 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1689 if (r) {
1690 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1691 return r;
1692 }
1693
1694 /* load the compute shaders */
1695 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1696 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1697
1698 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1699 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1700
1701 /* init the ib length to 0 */
1702 ib.length_dw = 0;
1703
1704 /* VGPR */
1705 /* write the register state for the compute dispatch */
1706 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1708 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1709 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1710 }
1711 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1712 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1714 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1715 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1716 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1717
1718 /* write dispatch packet */
1719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1720 ib.ptr[ib.length_dw++] = 8; /* x */
1721 ib.ptr[ib.length_dw++] = 1; /* y */
1722 ib.ptr[ib.length_dw++] = 1; /* z */
1723 ib.ptr[ib.length_dw++] =
1724 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1725
1726 /* write CS partial flush packet */
1727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1728 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1729
1730 /* SGPR1 */
1731 /* write the register state for the compute dispatch */
1732 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1734 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1735 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1736 }
1737 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1738 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1740 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1741 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1742 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1743
1744 /* write dispatch packet */
1745 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1746 ib.ptr[ib.length_dw++] = 8; /* x */
1747 ib.ptr[ib.length_dw++] = 1; /* y */
1748 ib.ptr[ib.length_dw++] = 1; /* z */
1749 ib.ptr[ib.length_dw++] =
1750 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1751
1752 /* write CS partial flush packet */
1753 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1754 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1755
1756 /* SGPR2 */
1757 /* write the register state for the compute dispatch */
1758 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1760 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1761 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1762 }
1763 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1764 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1765 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1766 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1767 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1768 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1769
1770 /* write dispatch packet */
1771 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1772 ib.ptr[ib.length_dw++] = 8; /* x */
1773 ib.ptr[ib.length_dw++] = 1; /* y */
1774 ib.ptr[ib.length_dw++] = 1; /* z */
1775 ib.ptr[ib.length_dw++] =
1776 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1777
1778 /* write CS partial flush packet */
1779 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1780 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1781
1782 /* shedule the ib on the ring */
50ddc75e 1783 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
ccba7691
AD
1784 if (r) {
1785 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1786 goto fail;
1787 }
1788
1789 /* wait for the GPU to finish processing the IB */
f54d1867 1790 r = dma_fence_wait(f, false);
ccba7691
AD
1791 if (r) {
1792 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1793 goto fail;
1794 }
1795
1796 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1797 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1798 WREG32(mmGB_EDC_MODE, tmp);
1799
1800 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1801 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1802 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1803
1804
1805 /* read back registers to clear the counters */
1806 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1807 RREG32(sec_ded_counter_registers[i]);
1808
1809fail:
cc55c45d 1810 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 1811 dma_fence_put(f);
ccba7691
AD
1812
1813 return r;
1814}
1815
68182d90 1816static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1817{
1818 u32 gb_addr_config;
1819 u32 mc_shared_chmap, mc_arb_ramcfg;
1820 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1821 u32 tmp;
68182d90 1822 int ret;
0bde3a95
AD
1823
1824 switch (adev->asic_type) {
1825 case CHIP_TOPAZ:
1826 adev->gfx.config.max_shader_engines = 1;
1827 adev->gfx.config.max_tile_pipes = 2;
1828 adev->gfx.config.max_cu_per_sh = 6;
1829 adev->gfx.config.max_sh_per_se = 1;
1830 adev->gfx.config.max_backends_per_se = 2;
1831 adev->gfx.config.max_texture_channel_caches = 2;
1832 adev->gfx.config.max_gprs = 256;
1833 adev->gfx.config.max_gs_threads = 32;
1834 adev->gfx.config.max_hw_contexts = 8;
1835
1836 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1837 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1838 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1839 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1840 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1841 break;
1842 case CHIP_FIJI:
1843 adev->gfx.config.max_shader_engines = 4;
1844 adev->gfx.config.max_tile_pipes = 16;
1845 adev->gfx.config.max_cu_per_sh = 16;
1846 adev->gfx.config.max_sh_per_se = 1;
1847 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1848 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1849 adev->gfx.config.max_gprs = 256;
1850 adev->gfx.config.max_gs_threads = 32;
1851 adev->gfx.config.max_hw_contexts = 8;
1852
68182d90
FC
1853 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1857 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1858 break;
2cc0c0b5 1859 case CHIP_POLARIS11:
c4642a47 1860 case CHIP_POLARIS12:
68182d90
FC
1861 ret = amdgpu_atombios_get_gfx_info(adev);
1862 if (ret)
1863 return ret;
1864 adev->gfx.config.max_gprs = 256;
1865 adev->gfx.config.max_gs_threads = 32;
1866 adev->gfx.config.max_hw_contexts = 8;
1867
1868 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1872 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1873 break;
2cc0c0b5 1874 case CHIP_POLARIS10:
68182d90
FC
1875 ret = amdgpu_atombios_get_gfx_info(adev);
1876 if (ret)
1877 return ret;
1878 adev->gfx.config.max_gprs = 256;
1879 adev->gfx.config.max_gs_threads = 32;
1880 adev->gfx.config.max_hw_contexts = 8;
1881
0bde3a95
AD
1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887 break;
1888 case CHIP_TONGA:
1889 adev->gfx.config.max_shader_engines = 4;
1890 adev->gfx.config.max_tile_pipes = 8;
1891 adev->gfx.config.max_cu_per_sh = 8;
1892 adev->gfx.config.max_sh_per_se = 1;
1893 adev->gfx.config.max_backends_per_se = 2;
1894 adev->gfx.config.max_texture_channel_caches = 8;
1895 adev->gfx.config.max_gprs = 256;
1896 adev->gfx.config.max_gs_threads = 32;
1897 adev->gfx.config.max_hw_contexts = 8;
1898
1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1904 break;
1905 case CHIP_CARRIZO:
1906 adev->gfx.config.max_shader_engines = 1;
1907 adev->gfx.config.max_tile_pipes = 2;
1908 adev->gfx.config.max_sh_per_se = 1;
1909 adev->gfx.config.max_backends_per_se = 2;
1910
1911 switch (adev->pdev->revision) {
1912 case 0xc4:
1913 case 0x84:
1914 case 0xc8:
1915 case 0xcc:
b8b339ea
AD
1916 case 0xe1:
1917 case 0xe3:
0bde3a95
AD
1918 /* B10 */
1919 adev->gfx.config.max_cu_per_sh = 8;
1920 break;
1921 case 0xc5:
1922 case 0x81:
1923 case 0x85:
1924 case 0xc9:
1925 case 0xcd:
b8b339ea
AD
1926 case 0xe2:
1927 case 0xe4:
0bde3a95
AD
1928 /* B8 */
1929 adev->gfx.config.max_cu_per_sh = 6;
1930 break;
1931 case 0xc6:
1932 case 0xca:
1933 case 0xce:
b8b339ea 1934 case 0x88:
80112bff 1935 case 0xe6:
0bde3a95
AD
1936 /* B6 */
1937 adev->gfx.config.max_cu_per_sh = 6;
1938 break;
1939 case 0xc7:
1940 case 0x87:
1941 case 0xcb:
b8b339ea
AD
1942 case 0xe5:
1943 case 0x89:
0bde3a95
AD
1944 default:
1945 /* B4 */
1946 adev->gfx.config.max_cu_per_sh = 4;
1947 break;
1948 }
1949
1950 adev->gfx.config.max_texture_channel_caches = 2;
1951 adev->gfx.config.max_gprs = 256;
1952 adev->gfx.config.max_gs_threads = 32;
1953 adev->gfx.config.max_hw_contexts = 8;
1954
e3c7656c
SL
1955 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1956 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1957 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1958 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1959 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1960 break;
1961 case CHIP_STONEY:
1962 adev->gfx.config.max_shader_engines = 1;
1963 adev->gfx.config.max_tile_pipes = 2;
1964 adev->gfx.config.max_sh_per_se = 1;
1965 adev->gfx.config.max_backends_per_se = 1;
1966
1967 switch (adev->pdev->revision) {
80112bff
AD
1968 case 0x80:
1969 case 0x81:
e3c7656c
SL
1970 case 0xc0:
1971 case 0xc1:
1972 case 0xc2:
1973 case 0xc4:
1974 case 0xc8:
1975 case 0xc9:
80112bff
AD
1976 case 0xd6:
1977 case 0xda:
1978 case 0xe9:
1979 case 0xea:
e3c7656c
SL
1980 adev->gfx.config.max_cu_per_sh = 3;
1981 break;
80112bff 1982 case 0x83:
e3c7656c
SL
1983 case 0xd0:
1984 case 0xd1:
1985 case 0xd2:
80112bff
AD
1986 case 0xd4:
1987 case 0xdb:
1988 case 0xe1:
1989 case 0xe2:
e3c7656c
SL
1990 default:
1991 adev->gfx.config.max_cu_per_sh = 2;
1992 break;
1993 }
1994
1995 adev->gfx.config.max_texture_channel_caches = 2;
1996 adev->gfx.config.max_gprs = 256;
1997 adev->gfx.config.max_gs_threads = 16;
1998 adev->gfx.config.max_hw_contexts = 8;
1999
0bde3a95
AD
2000 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2001 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2002 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2003 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2004 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
2005 break;
2006 default:
2007 adev->gfx.config.max_shader_engines = 2;
2008 adev->gfx.config.max_tile_pipes = 4;
2009 adev->gfx.config.max_cu_per_sh = 2;
2010 adev->gfx.config.max_sh_per_se = 1;
2011 adev->gfx.config.max_backends_per_se = 2;
2012 adev->gfx.config.max_texture_channel_caches = 4;
2013 adev->gfx.config.max_gprs = 256;
2014 adev->gfx.config.max_gs_threads = 32;
2015 adev->gfx.config.max_hw_contexts = 8;
2016
2017 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2018 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2019 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2020 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2021 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2022 break;
2023 }
2024
2025 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2026 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2027 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2028
2029 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2030 adev->gfx.config.mem_max_burst_length_bytes = 256;
2031 if (adev->flags & AMD_IS_APU) {
2032 /* Get memory bank mapping mode. */
2033 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2034 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2035 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2036
2037 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2038 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2039 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2040
2041 /* Validate settings in case only one DIMM installed. */
2042 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2043 dimm00_addr_map = 0;
2044 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2045 dimm01_addr_map = 0;
2046 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2047 dimm10_addr_map = 0;
2048 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2049 dimm11_addr_map = 0;
2050
2051 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2052 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2053 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2054 adev->gfx.config.mem_row_size_in_kb = 2;
2055 else
2056 adev->gfx.config.mem_row_size_in_kb = 1;
2057 } else {
2058 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2059 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2060 if (adev->gfx.config.mem_row_size_in_kb > 4)
2061 adev->gfx.config.mem_row_size_in_kb = 4;
2062 }
2063
2064 adev->gfx.config.shader_engine_tile_size = 32;
2065 adev->gfx.config.num_gpus = 1;
2066 adev->gfx.config.multi_gpu_tile_size = 64;
2067
2068 /* fix up row size */
2069 switch (adev->gfx.config.mem_row_size_in_kb) {
2070 case 1:
2071 default:
2072 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2073 break;
2074 case 2:
2075 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2076 break;
2077 case 4:
2078 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2079 break;
2080 }
2081 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
2082
2083 return 0;
0bde3a95
AD
2084}
2085
5fc3aeeb 2086static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
2087{
2088 int i, r;
2089 struct amdgpu_ring *ring;
4e638ae9 2090 struct amdgpu_kiq *kiq;
5fc3aeeb 2091 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2092
4e638ae9 2093 /* KIQ event */
d766e6a3 2094 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
4e638ae9
XY
2095 if (r)
2096 return r;
2097
aaa36a97 2098 /* EOP Event */
d766e6a3 2099 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
aaa36a97
AD
2100 if (r)
2101 return r;
2102
2103 /* Privileged reg */
d766e6a3
AD
2104 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2105 &adev->gfx.priv_reg_irq);
aaa36a97
AD
2106 if (r)
2107 return r;
2108
2109 /* Privileged inst */
d766e6a3
AD
2110 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2111 &adev->gfx.priv_inst_irq);
aaa36a97
AD
2112 if (r)
2113 return r;
2114
2115 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2116
2117 gfx_v8_0_scratch_init(adev);
2118
2119 r = gfx_v8_0_init_microcode(adev);
2120 if (r) {
2121 DRM_ERROR("Failed to load gfx firmware!\n");
2122 return r;
2123 }
2124
2b6cd977
EH
2125 r = gfx_v8_0_rlc_init(adev);
2126 if (r) {
2127 DRM_ERROR("Failed to init rlc BOs!\n");
2128 return r;
2129 }
2130
aaa36a97
AD
2131 r = gfx_v8_0_mec_init(adev);
2132 if (r) {
2133 DRM_ERROR("Failed to init MEC BOs!\n");
2134 return r;
2135 }
2136
aaa36a97
AD
2137 /* set up the gfx ring */
2138 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2139 ring = &adev->gfx.gfx_ring[i];
2140 ring->ring_obj = NULL;
2141 sprintf(ring->name, "gfx");
2142 /* no gfx doorbells on iceland */
2143 if (adev->asic_type != CHIP_TOPAZ) {
2144 ring->use_doorbell = true;
2145 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2146 }
2147
79887142
CK
2148 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2149 AMDGPU_CP_IRQ_GFX_EOP);
aaa36a97
AD
2150 if (r)
2151 return r;
2152 }
2153
2154 /* set up the compute queues */
2155 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2156 unsigned irq_type;
2157
2158 /* max 32 queues per MEC */
2159 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2160 DRM_ERROR("Too many (%d) compute rings!\n", i);
2161 break;
2162 }
2163 ring = &adev->gfx.compute_ring[i];
2164 ring->ring_obj = NULL;
2165 ring->use_doorbell = true;
2166 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2167 ring->me = 1; /* first MEC */
2168 ring->pipe = i / 8;
2169 ring->queue = i % 8;
34534610 2170 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
771c8ec1 2171 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
aaa36a97
AD
2172 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2173 /* type-2 packets are deprecated on MEC, use type-3 instead */
79887142
CK
2174 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2175 irq_type);
aaa36a97
AD
2176 if (r)
2177 return r;
2178 }
2179
b4e40676
DP
2180 r = gfx_v8_0_kiq_init(adev);
2181 if (r) {
2182 DRM_ERROR("Failed to init KIQ BOs!\n");
2183 return r;
2184 }
596c67d0 2185
b4e40676
DP
2186 kiq = &adev->gfx.kiq;
2187 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2188 if (r)
2189 return r;
596c67d0 2190
b4e40676
DP
2191 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2192 r = gfx_v8_0_compute_mqd_sw_init(adev);
2193 if (r)
2194 return r;
596c67d0 2195
aaa36a97 2196 /* reserve GDS, GWS and OA resource for gfx */
78bbbd9c
CK
2197 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2198 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2199 &adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2200 if (r)
2201 return r;
2202
78bbbd9c
CK
2203 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2204 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2205 &adev->gds.gws_gfx_bo, NULL, NULL);
aaa36a97
AD
2206 if (r)
2207 return r;
2208
78bbbd9c
CK
2209 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2210 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2211 &adev->gds.oa_gfx_bo, NULL, NULL);
aaa36a97
AD
2212 if (r)
2213 return r;
2214
a101a899
KW
2215 adev->gfx.ce_ram_size = 0x8000;
2216
68182d90
FC
2217 r = gfx_v8_0_gpu_early_init(adev);
2218 if (r)
2219 return r;
0bde3a95 2220
aaa36a97
AD
2221 return 0;
2222}
2223
5fc3aeeb 2224static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2225{
2226 int i;
5fc3aeeb 2227 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2228
8640faed
JZ
2229 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2230 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2231 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2232
2233 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2234 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2235 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2236 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2237
b4e40676
DP
2238 gfx_v8_0_compute_mqd_sw_fini(adev);
2239 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2240 gfx_v8_0_kiq_fini(adev);
596c67d0 2241
aaa36a97 2242 gfx_v8_0_mec_fini(adev);
2b6cd977 2243 gfx_v8_0_rlc_fini(adev);
13331ac3 2244 gfx_v8_0_free_microcode(adev);
2b6cd977 2245
aaa36a97
AD
2246 return 0;
2247}
2248
2249static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2250{
90bea0ab 2251 uint32_t *modearray, *mod2array;
eb64526f
TSD
2252 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2253 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2254 u32 reg_offset;
aaa36a97 2255
90bea0ab
TSD
2256 modearray = adev->gfx.config.tile_mode_array;
2257 mod2array = adev->gfx.config.macrotile_mode_array;
2258
2259 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2260 modearray[reg_offset] = 0;
2261
2262 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2263 mod2array[reg_offset] = 0;
aaa36a97
AD
2264
2265 switch (adev->asic_type) {
2266 case CHIP_TOPAZ:
90bea0ab
TSD
2267 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2268 PIPE_CONFIG(ADDR_SURF_P2) |
2269 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2270 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2271 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272 PIPE_CONFIG(ADDR_SURF_P2) |
2273 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2275 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276 PIPE_CONFIG(ADDR_SURF_P2) |
2277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 PIPE_CONFIG(ADDR_SURF_P2) |
2281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2284 PIPE_CONFIG(ADDR_SURF_P2) |
2285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288 PIPE_CONFIG(ADDR_SURF_P2) |
2289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2291 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2292 PIPE_CONFIG(ADDR_SURF_P2) |
2293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2295 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2296 PIPE_CONFIG(ADDR_SURF_P2));
2297 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2298 PIPE_CONFIG(ADDR_SURF_P2) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2301 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302 PIPE_CONFIG(ADDR_SURF_P2) |
2303 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2306 PIPE_CONFIG(ADDR_SURF_P2) |
2307 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2309 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310 PIPE_CONFIG(ADDR_SURF_P2) |
2311 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2313 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314 PIPE_CONFIG(ADDR_SURF_P2) |
2315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2318 PIPE_CONFIG(ADDR_SURF_P2) |
2319 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322 PIPE_CONFIG(ADDR_SURF_P2) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2325 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2326 PIPE_CONFIG(ADDR_SURF_P2) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2329 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2330 PIPE_CONFIG(ADDR_SURF_P2) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2333 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2334 PIPE_CONFIG(ADDR_SURF_P2) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2337 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2338 PIPE_CONFIG(ADDR_SURF_P2) |
2339 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2341 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2342 PIPE_CONFIG(ADDR_SURF_P2) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2345 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2346 PIPE_CONFIG(ADDR_SURF_P2) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2350 PIPE_CONFIG(ADDR_SURF_P2) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2353 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2354 PIPE_CONFIG(ADDR_SURF_P2) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2357 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358 PIPE_CONFIG(ADDR_SURF_P2) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 PIPE_CONFIG(ADDR_SURF_P2) |
2363 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2366 PIPE_CONFIG(ADDR_SURF_P2) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2369
2370 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2373 NUM_BANKS(ADDR_SURF_8_BANK));
2374 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2377 NUM_BANKS(ADDR_SURF_8_BANK));
2378 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381 NUM_BANKS(ADDR_SURF_8_BANK));
2382 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2385 NUM_BANKS(ADDR_SURF_8_BANK));
2386 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2389 NUM_BANKS(ADDR_SURF_8_BANK));
2390 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2393 NUM_BANKS(ADDR_SURF_8_BANK));
2394 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2397 NUM_BANKS(ADDR_SURF_8_BANK));
2398 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2401 NUM_BANKS(ADDR_SURF_16_BANK));
2402 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2405 NUM_BANKS(ADDR_SURF_16_BANK));
2406 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2409 NUM_BANKS(ADDR_SURF_16_BANK));
2410 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2413 NUM_BANKS(ADDR_SURF_16_BANK));
2414 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2417 NUM_BANKS(ADDR_SURF_16_BANK));
2418 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2421 NUM_BANKS(ADDR_SURF_16_BANK));
2422 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2425 NUM_BANKS(ADDR_SURF_8_BANK));
2426
2427 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2428 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2429 reg_offset != 23)
2430 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2431
2432 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2433 if (reg_offset != 7)
2434 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2435
8cdacf44 2436 break;
af15a2d5 2437 case CHIP_FIJI:
90bea0ab
TSD
2438 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2446 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2450 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2454 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2458 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2462 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2464 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2465 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2466 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2471 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2472 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2476 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2484 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2488 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2501 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2504 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2508 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2509 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2512 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2513 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2516 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2517 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2520 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2521 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2524 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2525 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2528 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2529 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2532 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2533 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2536 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2537 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2540 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2541 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2544 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2556 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2558 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560
2561 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2564 NUM_BANKS(ADDR_SURF_8_BANK));
2565 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2568 NUM_BANKS(ADDR_SURF_8_BANK));
2569 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2572 NUM_BANKS(ADDR_SURF_8_BANK));
2573 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2576 NUM_BANKS(ADDR_SURF_8_BANK));
2577 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2580 NUM_BANKS(ADDR_SURF_8_BANK));
2581 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584 NUM_BANKS(ADDR_SURF_8_BANK));
2585 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2588 NUM_BANKS(ADDR_SURF_8_BANK));
2589 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2592 NUM_BANKS(ADDR_SURF_8_BANK));
2593 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2596 NUM_BANKS(ADDR_SURF_8_BANK));
2597 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2600 NUM_BANKS(ADDR_SURF_8_BANK));
2601 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2604 NUM_BANKS(ADDR_SURF_8_BANK));
2605 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2608 NUM_BANKS(ADDR_SURF_8_BANK));
2609 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2612 NUM_BANKS(ADDR_SURF_8_BANK));
2613 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2616 NUM_BANKS(ADDR_SURF_4_BANK));
2617
2618 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2619 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2620
2621 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2622 if (reg_offset != 7)
2623 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2624
5f2e816b 2625 break;
aaa36a97 2626 case CHIP_TONGA:
90bea0ab
TSD
2627 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2631 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2635 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2638 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2639 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2642 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2646 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2648 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2650 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2653 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2654 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2655 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2658 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2659 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2660 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2661 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2665 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2673 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2674 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2677 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2685 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2686 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2689 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2690 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2693 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2697 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2701 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2702 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2705 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2706 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2709 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2710 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2714 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2718 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2722 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2726 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2729 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2730 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2731 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2733 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2734 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2735 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2739 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2743 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2745 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749
2750 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753 NUM_BANKS(ADDR_SURF_16_BANK));
2754 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2757 NUM_BANKS(ADDR_SURF_16_BANK));
2758 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2761 NUM_BANKS(ADDR_SURF_16_BANK));
2762 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765 NUM_BANKS(ADDR_SURF_16_BANK));
2766 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2769 NUM_BANKS(ADDR_SURF_16_BANK));
2770 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2773 NUM_BANKS(ADDR_SURF_16_BANK));
2774 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2777 NUM_BANKS(ADDR_SURF_16_BANK));
2778 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2781 NUM_BANKS(ADDR_SURF_16_BANK));
2782 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2785 NUM_BANKS(ADDR_SURF_16_BANK));
2786 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789 NUM_BANKS(ADDR_SURF_16_BANK));
2790 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2793 NUM_BANKS(ADDR_SURF_16_BANK));
2794 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2797 NUM_BANKS(ADDR_SURF_8_BANK));
2798 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801 NUM_BANKS(ADDR_SURF_4_BANK));
2802 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2804 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2805 NUM_BANKS(ADDR_SURF_4_BANK));
2806
2807 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2808 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2809
2810 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2811 if (reg_offset != 7)
2812 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2813
68182d90 2814 break;
2cc0c0b5 2815 case CHIP_POLARIS11:
c4642a47 2816 case CHIP_POLARIS12:
68182d90
FC
2817 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2821 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2825 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2827 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2829 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2833 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2836 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2837 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2841 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2844 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2845 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2848 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2849 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2850 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2851 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2863 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2867 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2880 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2883 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2887 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2892 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2895 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2896 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2899 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2903 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2907 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2908 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2911 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2916 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2919 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2920 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2921 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2923 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2928 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2932 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2933 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2934 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2935 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2937 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2939
2940 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2943 NUM_BANKS(ADDR_SURF_16_BANK));
2944
2945 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2948 NUM_BANKS(ADDR_SURF_16_BANK));
2949
2950 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953 NUM_BANKS(ADDR_SURF_16_BANK));
2954
2955 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958 NUM_BANKS(ADDR_SURF_16_BANK));
2959
2960 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2963 NUM_BANKS(ADDR_SURF_16_BANK));
2964
2965 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2966 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2967 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2968 NUM_BANKS(ADDR_SURF_16_BANK));
2969
2970 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2973 NUM_BANKS(ADDR_SURF_16_BANK));
2974
2975 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 NUM_BANKS(ADDR_SURF_16_BANK));
2979
2980 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 NUM_BANKS(ADDR_SURF_16_BANK));
2984
2985 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2988 NUM_BANKS(ADDR_SURF_16_BANK));
2989
2990 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2993 NUM_BANKS(ADDR_SURF_16_BANK));
2994
2995 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2998 NUM_BANKS(ADDR_SURF_16_BANK));
2999
3000 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3003 NUM_BANKS(ADDR_SURF_8_BANK));
3004
3005 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3006 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3007 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3008 NUM_BANKS(ADDR_SURF_4_BANK));
3009
3010 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3011 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3012
3013 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3014 if (reg_offset != 7)
3015 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3016
3017 break;
2cc0c0b5 3018 case CHIP_POLARIS10:
68182d90
FC
3019 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3023 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3027 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3029 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3030 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3031 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3034 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3035 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3038 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3039 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3043 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3045 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3046 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3047 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3049 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3050 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3051 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3052 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3053 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3061 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3065 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3066 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3069 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3077 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3078 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3081 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3085 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3089 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3093 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3097 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3101 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3102 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3110 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3114 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3119 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3121 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3122 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3123 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3125 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3126 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3127 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3129 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3130 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3131 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3133 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3134 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3135 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3137 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3138 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3139 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3141
3142 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3145 NUM_BANKS(ADDR_SURF_16_BANK));
3146
3147 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3150 NUM_BANKS(ADDR_SURF_16_BANK));
3151
3152 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3155 NUM_BANKS(ADDR_SURF_16_BANK));
3156
3157 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3158 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3159 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3160 NUM_BANKS(ADDR_SURF_16_BANK));
3161
3162 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3165 NUM_BANKS(ADDR_SURF_16_BANK));
3166
3167 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3170 NUM_BANKS(ADDR_SURF_16_BANK));
3171
3172 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3175 NUM_BANKS(ADDR_SURF_16_BANK));
3176
3177 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3178 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3179 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3180 NUM_BANKS(ADDR_SURF_16_BANK));
3181
3182 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3185 NUM_BANKS(ADDR_SURF_16_BANK));
3186
3187 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3190 NUM_BANKS(ADDR_SURF_16_BANK));
3191
3192 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195 NUM_BANKS(ADDR_SURF_16_BANK));
3196
3197 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3200 NUM_BANKS(ADDR_SURF_8_BANK));
3201
3202 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3203 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3204 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3205 NUM_BANKS(ADDR_SURF_4_BANK));
3206
3207 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3209 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3210 NUM_BANKS(ADDR_SURF_4_BANK));
3211
3212 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3213 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3214
3215 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3216 if (reg_offset != 7)
3217 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3218
aaa36a97 3219 break;
e3c7656c 3220 case CHIP_STONEY:
90bea0ab
TSD
3221 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3222 PIPE_CONFIG(ADDR_SURF_P2) |
3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3225 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3226 PIPE_CONFIG(ADDR_SURF_P2) |
3227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3229 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3230 PIPE_CONFIG(ADDR_SURF_P2) |
3231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3233 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234 PIPE_CONFIG(ADDR_SURF_P2) |
3235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3237 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3238 PIPE_CONFIG(ADDR_SURF_P2) |
3239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3241 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3242 PIPE_CONFIG(ADDR_SURF_P2) |
3243 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3245 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3246 PIPE_CONFIG(ADDR_SURF_P2) |
3247 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3248 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3249 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3250 PIPE_CONFIG(ADDR_SURF_P2));
3251 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3263 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3264 PIPE_CONFIG(ADDR_SURF_P2) |
3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3268 PIPE_CONFIG(ADDR_SURF_P2) |
3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3271 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3272 PIPE_CONFIG(ADDR_SURF_P2) |
3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3275 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3276 PIPE_CONFIG(ADDR_SURF_P2) |
3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3279 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3280 PIPE_CONFIG(ADDR_SURF_P2) |
3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3283 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3287 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3291 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3295 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3299 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3300 PIPE_CONFIG(ADDR_SURF_P2) |
3301 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3303 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3304 PIPE_CONFIG(ADDR_SURF_P2) |
3305 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3307 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3311 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3312 PIPE_CONFIG(ADDR_SURF_P2) |
3313 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3316 PIPE_CONFIG(ADDR_SURF_P2) |
3317 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320 PIPE_CONFIG(ADDR_SURF_P2) |
3321 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323
3324 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3327 NUM_BANKS(ADDR_SURF_8_BANK));
3328 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331 NUM_BANKS(ADDR_SURF_8_BANK));
3332 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3335 NUM_BANKS(ADDR_SURF_8_BANK));
3336 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3339 NUM_BANKS(ADDR_SURF_8_BANK));
3340 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 NUM_BANKS(ADDR_SURF_8_BANK));
3344 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3347 NUM_BANKS(ADDR_SURF_8_BANK));
3348 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3351 NUM_BANKS(ADDR_SURF_8_BANK));
3352 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 NUM_BANKS(ADDR_SURF_16_BANK));
3356 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3359 NUM_BANKS(ADDR_SURF_16_BANK));
3360 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3363 NUM_BANKS(ADDR_SURF_16_BANK));
3364 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 NUM_BANKS(ADDR_SURF_16_BANK));
3368 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371 NUM_BANKS(ADDR_SURF_16_BANK));
3372 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375 NUM_BANKS(ADDR_SURF_16_BANK));
3376 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379 NUM_BANKS(ADDR_SURF_8_BANK));
3380
3381 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3382 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3383 reg_offset != 23)
3384 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3385
3386 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3387 if (reg_offset != 7)
3388 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3389
e3c7656c 3390 break;
aaa36a97 3391 default:
90bea0ab
TSD
3392 dev_warn(adev->dev,
3393 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3394 adev->asic_type);
3395
3396 case CHIP_CARRIZO:
3397 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3398 PIPE_CONFIG(ADDR_SURF_P2) |
3399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3401 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3402 PIPE_CONFIG(ADDR_SURF_P2) |
3403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3405 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3406 PIPE_CONFIG(ADDR_SURF_P2) |
3407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3409 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3410 PIPE_CONFIG(ADDR_SURF_P2) |
3411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3413 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3414 PIPE_CONFIG(ADDR_SURF_P2) |
3415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3417 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3418 PIPE_CONFIG(ADDR_SURF_P2) |
3419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3421 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3422 PIPE_CONFIG(ADDR_SURF_P2) |
3423 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3424 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3425 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3426 PIPE_CONFIG(ADDR_SURF_P2));
3427 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3428 PIPE_CONFIG(ADDR_SURF_P2) |
3429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3431 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3432 PIPE_CONFIG(ADDR_SURF_P2) |
3433 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3436 PIPE_CONFIG(ADDR_SURF_P2) |
3437 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3439 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3440 PIPE_CONFIG(ADDR_SURF_P2) |
3441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3443 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3444 PIPE_CONFIG(ADDR_SURF_P2) |
3445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3447 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3448 PIPE_CONFIG(ADDR_SURF_P2) |
3449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3451 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3452 PIPE_CONFIG(ADDR_SURF_P2) |
3453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3455 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3456 PIPE_CONFIG(ADDR_SURF_P2) |
3457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3459 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3460 PIPE_CONFIG(ADDR_SURF_P2) |
3461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3463 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3464 PIPE_CONFIG(ADDR_SURF_P2) |
3465 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3467 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3468 PIPE_CONFIG(ADDR_SURF_P2) |
3469 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3471 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3472 PIPE_CONFIG(ADDR_SURF_P2) |
3473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3475 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3476 PIPE_CONFIG(ADDR_SURF_P2) |
3477 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3479 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3480 PIPE_CONFIG(ADDR_SURF_P2) |
3481 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3483 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3484 PIPE_CONFIG(ADDR_SURF_P2) |
3485 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3487 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3488 PIPE_CONFIG(ADDR_SURF_P2) |
3489 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3491 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3492 PIPE_CONFIG(ADDR_SURF_P2) |
3493 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3495 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3496 PIPE_CONFIG(ADDR_SURF_P2) |
3497 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3499
3500 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3503 NUM_BANKS(ADDR_SURF_8_BANK));
3504 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3507 NUM_BANKS(ADDR_SURF_8_BANK));
3508 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3511 NUM_BANKS(ADDR_SURF_8_BANK));
3512 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3515 NUM_BANKS(ADDR_SURF_8_BANK));
3516 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3519 NUM_BANKS(ADDR_SURF_8_BANK));
3520 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3523 NUM_BANKS(ADDR_SURF_8_BANK));
3524 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3527 NUM_BANKS(ADDR_SURF_8_BANK));
3528 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3531 NUM_BANKS(ADDR_SURF_16_BANK));
3532 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3535 NUM_BANKS(ADDR_SURF_16_BANK));
3536 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3539 NUM_BANKS(ADDR_SURF_16_BANK));
3540 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3543 NUM_BANKS(ADDR_SURF_16_BANK));
3544 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3547 NUM_BANKS(ADDR_SURF_16_BANK));
3548 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3551 NUM_BANKS(ADDR_SURF_16_BANK));
3552 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3555 NUM_BANKS(ADDR_SURF_8_BANK));
3556
3557 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3558 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3559 reg_offset != 23)
3560 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3561
3562 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3563 if (reg_offset != 7)
3564 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3565
3566 break;
aaa36a97
AD
3567 }
3568}
3569
05fb7291 3570static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3571 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3572{
9559ef5b
TSD
3573 u32 data;
3574
3575 if (instance == 0xffffffff)
3576 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3577 else
3578 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97 3579
5003f278 3580 if (se_num == 0xffffffff)
aaa36a97 3581 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
5003f278 3582 else
aaa36a97 3583 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
5003f278
TSD
3584
3585 if (sh_num == 0xffffffff)
3586 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3587 else
aaa36a97 3588 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
5003f278 3589
aaa36a97
AD
3590 WREG32(mmGRBM_GFX_INDEX, data);
3591}
3592
8f8e00c1
AD
3593static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3594{
3595 return (u32)((1ULL << bit_width) - 1);
3596}
3597
3598static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3599{
3600 u32 data, mask;
3601
5003f278
TSD
3602 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3603 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
aaa36a97 3604
5003f278 3605 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
aaa36a97 3606
8f8e00c1
AD
3607 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3608 adev->gfx.config.max_sh_per_se);
aaa36a97 3609
8f8e00c1 3610 return (~data) & mask;
aaa36a97
AD
3611}
3612
167ac573
HR
3613static void
3614gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3615{
3616 switch (adev->asic_type) {
3617 case CHIP_FIJI:
3618 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3619 RB_XSEL2(1) | PKR_MAP(2) |
3620 PKR_XSEL(1) | PKR_YSEL(1) |
3621 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3622 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3623 SE_PAIR_YSEL(2);
3624 break;
3625 case CHIP_TONGA:
3626 case CHIP_POLARIS10:
3627 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3628 SE_XSEL(1) | SE_YSEL(1);
3629 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3630 SE_PAIR_YSEL(2);
3631 break;
3632 case CHIP_TOPAZ:
3633 case CHIP_CARRIZO:
3634 *rconf |= RB_MAP_PKR0(2);
3635 *rconf1 |= 0x0;
3636 break;
3637 case CHIP_POLARIS11:
c4642a47 3638 case CHIP_POLARIS12:
167ac573
HR
3639 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3640 SE_XSEL(1) | SE_YSEL(1);
3641 *rconf1 |= 0x0;
3642 break;
3643 case CHIP_STONEY:
3644 *rconf |= 0x0;
3645 *rconf1 |= 0x0;
3646 break;
3647 default:
3648 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3649 break;
3650 }
3651}
3652
3653static void
3654gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3655 u32 raster_config, u32 raster_config_1,
3656 unsigned rb_mask, unsigned num_rb)
3657{
3658 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3659 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3660 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3661 unsigned rb_per_se = num_rb / num_se;
3662 unsigned se_mask[4];
3663 unsigned se;
3664
3665 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3666 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3667 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3668 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3669
3670 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3671 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3672 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3673
3674 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3675 (!se_mask[2] && !se_mask[3]))) {
3676 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3677
3678 if (!se_mask[0] && !se_mask[1]) {
3679 raster_config_1 |=
3680 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3681 } else {
3682 raster_config_1 |=
3683 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3684 }
3685 }
3686
3687 for (se = 0; se < num_se; se++) {
3688 unsigned raster_config_se = raster_config;
3689 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3690 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3691 int idx = (se / 2) * 2;
3692
3693 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3694 raster_config_se &= ~SE_MAP_MASK;
3695
3696 if (!se_mask[idx]) {
3697 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3698 } else {
3699 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3700 }
3701 }
3702
3703 pkr0_mask &= rb_mask;
3704 pkr1_mask &= rb_mask;
3705 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3706 raster_config_se &= ~PKR_MAP_MASK;
3707
3708 if (!pkr0_mask) {
3709 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3710 } else {
3711 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3712 }
3713 }
3714
3715 if (rb_per_se >= 2) {
3716 unsigned rb0_mask = 1 << (se * rb_per_se);
3717 unsigned rb1_mask = rb0_mask << 1;
3718
3719 rb0_mask &= rb_mask;
3720 rb1_mask &= rb_mask;
3721 if (!rb0_mask || !rb1_mask) {
3722 raster_config_se &= ~RB_MAP_PKR0_MASK;
3723
3724 if (!rb0_mask) {
3725 raster_config_se |=
3726 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3727 } else {
3728 raster_config_se |=
3729 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3730 }
3731 }
3732
3733 if (rb_per_se > 2) {
3734 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3735 rb1_mask = rb0_mask << 1;
3736 rb0_mask &= rb_mask;
3737 rb1_mask &= rb_mask;
3738 if (!rb0_mask || !rb1_mask) {
3739 raster_config_se &= ~RB_MAP_PKR1_MASK;
3740
3741 if (!rb0_mask) {
3742 raster_config_se |=
3743 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3744 } else {
3745 raster_config_se |=
3746 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3747 }
3748 }
3749 }
3750 }
3751
3752 /* GRBM_GFX_INDEX has a different offset on VI */
3753 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3754 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3755 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3756 }
3757
3758 /* GRBM_GFX_INDEX has a different offset on VI */
3759 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3760}
3761
8f8e00c1 3762static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3763{
3764 int i, j;
aac1e3ca 3765 u32 data;
167ac573 3766 u32 raster_config = 0, raster_config_1 = 0;
8f8e00c1 3767 u32 active_rbs = 0;
6157bd7a
FC
3768 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3769 adev->gfx.config.max_sh_per_se;
167ac573 3770 unsigned num_rb_pipes;
aaa36a97
AD
3771
3772 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3773 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3774 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3775 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3776 data = gfx_v8_0_get_rb_active_bitmap(adev);
3777 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3778 rb_bitmap_width_per_sh);
aaa36a97
AD
3779 }
3780 }
9559ef5b 3781 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97 3782
8f8e00c1 3783 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3784 adev->gfx.config.num_rbs = hweight32(active_rbs);
167ac573
HR
3785
3786 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3787 adev->gfx.config.max_shader_engines, 16);
3788
3789 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3790
3791 if (!adev->gfx.config.backend_enable_mask ||
3792 adev->gfx.config.num_rbs >= num_rb_pipes) {
3793 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3794 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3795 } else {
3796 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3797 adev->gfx.config.backend_enable_mask,
3798 num_rb_pipes);
3799 }
3800
392f0c77
AD
3801 /* cache the values for userspace */
3802 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3803 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3804 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3805 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3806 RREG32(mmCC_RB_BACKEND_DISABLE);
3807 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3808 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3809 adev->gfx.config.rb_config[i][j].raster_config =
3810 RREG32(mmPA_SC_RASTER_CONFIG);
3811 adev->gfx.config.rb_config[i][j].raster_config_1 =
3812 RREG32(mmPA_SC_RASTER_CONFIG_1);
3813 }
3814 }
3815 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
167ac573 3816 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
3817}
3818
cd06bf68 3819/**
35c7a952 3820 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
3821 *
3822 * @rdev: amdgpu_device pointer
3823 *
3824 * Initialize compute vmid sh_mem registers
3825 *
3826 */
3827#define DEFAULT_SH_MEM_BASES (0x6000)
3828#define FIRST_COMPUTE_VMID (8)
3829#define LAST_COMPUTE_VMID (16)
35c7a952 3830static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3831{
3832 int i;
3833 uint32_t sh_mem_config;
3834 uint32_t sh_mem_bases;
3835
3836 /*
3837 * Configure apertures:
3838 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3839 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3840 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3841 */
3842 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3843
3844 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3845 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3846 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3847 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3848 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3849 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3850
3851 mutex_lock(&adev->srbm_mutex);
3852 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3853 vi_srbm_select(adev, 0, 0, 0, i);
3854 /* CP and shaders */
3855 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3856 WREG32(mmSH_MEM_APE1_BASE, 1);
3857 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3858 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3859 }
3860 vi_srbm_select(adev, 0, 0, 0, 0);
3861 mutex_unlock(&adev->srbm_mutex);
3862}
3863
df6e2c4a
JZ
3864static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3865{
3866 switch (adev->asic_type) {
3867 default:
3868 adev->gfx.config.double_offchip_lds_buf = 1;
3869 break;
3870 case CHIP_CARRIZO:
3871 case CHIP_STONEY:
3872 adev->gfx.config.double_offchip_lds_buf = 0;
3873 break;
3874 }
3875}
3876
aaa36a97
AD
3877static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3878{
8fe73328 3879 u32 tmp, sh_static_mem_cfg;
aaa36a97
AD
3880 int i;
3881
61cb8cef 3882 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
0bde3a95
AD
3883 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3884 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3885 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3886
3887 gfx_v8_0_tiling_mode_table_init(adev);
8f8e00c1 3888 gfx_v8_0_setup_rb(adev);
7dae69a2 3889 gfx_v8_0_get_cu_info(adev);
df6e2c4a 3890 gfx_v8_0_config_init(adev);
aaa36a97
AD
3891
3892 /* XXX SH_MEM regs */
3893 /* where to put LDS, scratch, GPUVM in FSA64 space */
8fe73328
JZ
3894 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3895 SWIZZLE_ENABLE, 1);
3896 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3897 ELEMENT_SIZE, 1);
3898 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3899 INDEX_STRIDE, 3);
aaa36a97 3900 mutex_lock(&adev->srbm_mutex);
7645670d 3901 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
aaa36a97
AD
3902 vi_srbm_select(adev, 0, 0, 0, i);
3903 /* CP and shaders */
3904 if (i == 0) {
3905 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3906 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3907 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3908 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3909 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328 3910 WREG32(mmSH_MEM_BASES, 0);
aaa36a97
AD
3911 } else {
3912 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
8fe73328 3913 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3914 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3915 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3916 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328
JZ
3917 tmp = adev->mc.shared_aperture_start >> 48;
3918 WREG32(mmSH_MEM_BASES, tmp);
aaa36a97
AD
3919 }
3920
3921 WREG32(mmSH_MEM_APE1_BASE, 1);
3922 WREG32(mmSH_MEM_APE1_LIMIT, 0);
8fe73328 3923 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
aaa36a97
AD
3924 }
3925 vi_srbm_select(adev, 0, 0, 0, 0);
3926 mutex_unlock(&adev->srbm_mutex);
3927
35c7a952 3928 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3929
aaa36a97
AD
3930 mutex_lock(&adev->grbm_idx_mutex);
3931 /*
3932 * making sure that the following register writes will be broadcasted
3933 * to all the shaders
3934 */
9559ef5b 3935 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3936
3937 WREG32(mmPA_SC_FIFO_SIZE,
3938 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3939 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3940 (adev->gfx.config.sc_prim_fifo_size_backend <<
3941 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3942 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3943 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3944 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3945 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
d2383267 3946
3947 tmp = RREG32(mmSPI_ARB_PRIORITY);
3948 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3949 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3950 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3951 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3952 WREG32(mmSPI_ARB_PRIORITY, tmp);
3953
aaa36a97
AD
3954 mutex_unlock(&adev->grbm_idx_mutex);
3955
3956}
3957
3958static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3959{
3960 u32 i, j, k;
3961 u32 mask;
3962
3963 mutex_lock(&adev->grbm_idx_mutex);
3964 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3965 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3966 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
3967 for (k = 0; k < adev->usec_timeout; k++) {
3968 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3969 break;
3970 udelay(1);
3971 }
3972 }
3973 }
9559ef5b 3974 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3975 mutex_unlock(&adev->grbm_idx_mutex);
3976
3977 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3978 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3979 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3980 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3981 for (k = 0; k < adev->usec_timeout; k++) {
3982 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3983 break;
3984 udelay(1);
3985 }
3986}
3987
3988static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3989 bool enable)
3990{
3991 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3992
0d07db7e
TSD
3993 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3994 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3995 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3996 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3997
aaa36a97
AD
3998 WREG32(mmCP_INT_CNTL_RING0, tmp);
3999}
4000
2b6cd977
EH
4001static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
4002{
4003 /* csib */
4004 WREG32(mmRLC_CSIB_ADDR_HI,
4005 adev->gfx.rlc.clear_state_gpu_addr >> 32);
4006 WREG32(mmRLC_CSIB_ADDR_LO,
4007 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
4008 WREG32(mmRLC_CSIB_LENGTH,
4009 adev->gfx.rlc.clear_state_size);
4010}
4011
4012static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4013 int ind_offset,
4014 int list_size,
4015 int *unique_indices,
4016 int *indices_count,
4017 int max_indices,
4018 int *ind_start_offsets,
4019 int *offset_count,
4020 int max_offset)
4021{
4022 int indices;
4023 bool new_entry = true;
4024
4025 for (; ind_offset < list_size; ind_offset++) {
4026
4027 if (new_entry) {
4028 new_entry = false;
4029 ind_start_offsets[*offset_count] = ind_offset;
4030 *offset_count = *offset_count + 1;
4031 BUG_ON(*offset_count >= max_offset);
4032 }
4033
4034 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4035 new_entry = true;
4036 continue;
4037 }
4038
4039 ind_offset += 2;
4040
4041 /* look for the matching indice */
4042 for (indices = 0;
4043 indices < *indices_count;
4044 indices++) {
4045 if (unique_indices[indices] ==
4046 register_list_format[ind_offset])
4047 break;
4048 }
4049
4050 if (indices >= *indices_count) {
4051 unique_indices[*indices_count] =
4052 register_list_format[ind_offset];
4053 indices = *indices_count;
4054 *indices_count = *indices_count + 1;
4055 BUG_ON(*indices_count >= max_indices);
4056 }
4057
4058 register_list_format[ind_offset] = indices;
4059 }
4060}
4061
4062static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4063{
4064 int i, temp, data;
4065 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4066 int indices_count = 0;
4067 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4068 int offset_count = 0;
4069
4070 int list_size;
4071 unsigned int *register_list_format =
4072 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3f12325a 4073 if (!register_list_format)
2b6cd977
EH
4074 return -ENOMEM;
4075 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4076 adev->gfx.rlc.reg_list_format_size_bytes);
4077
4078 gfx_v8_0_parse_ind_reg_list(register_list_format,
4079 RLC_FormatDirectRegListLength,
4080 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4081 unique_indices,
4082 &indices_count,
4083 sizeof(unique_indices) / sizeof(int),
4084 indirect_start_offsets,
4085 &offset_count,
4086 sizeof(indirect_start_offsets)/sizeof(int));
4087
4088 /* save and restore list */
61cb8cef 4089 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
2b6cd977
EH
4090
4091 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4092 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4093 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4094
4095 /* indirect list */
4096 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4097 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4098 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4099
4100 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4101 list_size = list_size >> 1;
4102 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4103 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4104
4105 /* starting offsets starts */
4106 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4107 adev->gfx.rlc.starting_offsets_start);
4108 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4109 WREG32(mmRLC_GPM_SCRATCH_DATA,
4110 indirect_start_offsets[i]);
4111
4112 /* unique indices */
4113 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4114 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4115 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
202e0b22 4116 if (unique_indices[i] != 0) {
b85c9d2a
ML
4117 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4118 WREG32(data + i, unique_indices[i] >> 20);
202e0b22 4119 }
2b6cd977
EH
4120 }
4121 kfree(register_list_format);
4122
4123 return 0;
4124}
4125
4126static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4127{
61cb8cef 4128 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
2b6cd977
EH
4129}
4130
fb16007b 4131static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
4132{
4133 uint32_t data;
4134
c4d17b81
RZ
4135 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4136
4137 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4138 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4139 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4140 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4141 WREG32(mmRLC_PG_DELAY, data);
4142
4143 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4144 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4145
f4bfffdd
EH
4146}
4147
2c547165
AD
4148static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4149 bool enable)
4150{
61cb8cef 4151 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
2c547165
AD
4152}
4153
4154static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4155 bool enable)
4156{
61cb8cef 4157 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
2c547165
AD
4158}
4159
4160static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4161{
eb584241 4162 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
2c547165
AD
4163}
4164
2b6cd977
EH
4165static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4166{
c4d17b81
RZ
4167 if ((adev->asic_type == CHIP_CARRIZO) ||
4168 (adev->asic_type == CHIP_STONEY)) {
2b6cd977
EH
4169 gfx_v8_0_init_csb(adev);
4170 gfx_v8_0_init_save_restore_list(adev);
4171 gfx_v8_0_enable_save_restore_machine(adev);
c4d17b81
RZ
4172 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4173 gfx_v8_0_init_power_gating(adev);
4174 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
c4642a47
JZ
4175 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4176 (adev->asic_type == CHIP_POLARIS12)) {
c4d17b81
RZ
4177 gfx_v8_0_init_csb(adev);
4178 gfx_v8_0_init_save_restore_list(adev);
4179 gfx_v8_0_enable_save_restore_machine(adev);
4180 gfx_v8_0_init_power_gating(adev);
2b6cd977 4181 }
c4d17b81 4182
2b6cd977
EH
4183}
4184
761c2e82 4185static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
aaa36a97 4186{
61cb8cef 4187 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
aaa36a97
AD
4188
4189 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
aaa36a97
AD
4190 gfx_v8_0_wait_for_rlc_serdes(adev);
4191}
4192
4193static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4194{
61cb8cef 4195 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
aaa36a97 4196 udelay(50);
61cb8cef
TSD
4197
4198 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
aaa36a97
AD
4199 udelay(50);
4200}
4201
4202static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4203{
61cb8cef 4204 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
aaa36a97
AD
4205
4206 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 4207 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4208 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4209
4210 udelay(50);
4211}
4212
4213static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4214{
4215 const struct rlc_firmware_header_v2_0 *hdr;
4216 const __le32 *fw_data;
4217 unsigned i, fw_size;
4218
4219 if (!adev->gfx.rlc_fw)
4220 return -EINVAL;
4221
4222 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4223 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
4224
4225 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4226 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4227 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4228
4229 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4230 for (i = 0; i < fw_size; i++)
4231 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4232 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4233
4234 return 0;
4235}
4236
4237static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4238{
4239 int r;
6ae81452 4240 u32 tmp;
aaa36a97
AD
4241
4242 gfx_v8_0_rlc_stop(adev);
4243
4244 /* disable CG */
6ae81452
AD
4245 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4246 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4247 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4248 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
2cc0c0b5 4249 if (adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
4250 adev->asic_type == CHIP_POLARIS10 ||
4251 adev->asic_type == CHIP_POLARIS12) {
6ae81452
AD
4252 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4253 tmp &= ~0x3;
4254 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4255 }
aaa36a97
AD
4256
4257 /* disable PG */
4258 WREG32(mmRLC_PG_CNTL, 0);
4259
4260 gfx_v8_0_rlc_reset(adev);
2b6cd977
EH
4261 gfx_v8_0_init_pg(adev);
4262
e61710c5 4263 if (!adev->pp_enabled) {
e635ee07 4264 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
4265 /* legacy rlc firmware loading */
4266 r = gfx_v8_0_rlc_load_microcode(adev);
4267 if (r)
4268 return r;
4269 } else {
4270 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4271 AMDGPU_UCODE_ID_RLC_G);
4272 if (r)
4273 return -EINVAL;
4274 }
aaa36a97
AD
4275 }
4276
4277 gfx_v8_0_rlc_start(adev);
4278
4279 return 0;
4280}
4281
4282static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4283{
4284 int i;
4285 u32 tmp = RREG32(mmCP_ME_CNTL);
4286
4287 if (enable) {
4288 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4289 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4290 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4291 } else {
4292 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4293 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4294 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4295 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4296 adev->gfx.gfx_ring[i].ready = false;
4297 }
4298 WREG32(mmCP_ME_CNTL, tmp);
4299 udelay(50);
4300}
4301
4302static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4303{
4304 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4305 const struct gfx_firmware_header_v1_0 *ce_hdr;
4306 const struct gfx_firmware_header_v1_0 *me_hdr;
4307 const __le32 *fw_data;
4308 unsigned i, fw_size;
4309
4310 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4311 return -EINVAL;
4312
4313 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4314 adev->gfx.pfp_fw->data;
4315 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4316 adev->gfx.ce_fw->data;
4317 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4318 adev->gfx.me_fw->data;
4319
4320 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4321 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4322 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4323
4324 gfx_v8_0_cp_gfx_enable(adev, false);
4325
4326 /* PFP */
4327 fw_data = (const __le32 *)
4328 (adev->gfx.pfp_fw->data +
4329 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4330 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4331 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4332 for (i = 0; i < fw_size; i++)
4333 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4334 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4335
4336 /* CE */
4337 fw_data = (const __le32 *)
4338 (adev->gfx.ce_fw->data +
4339 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4340 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4341 WREG32(mmCP_CE_UCODE_ADDR, 0);
4342 for (i = 0; i < fw_size; i++)
4343 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4344 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4345
4346 /* ME */
4347 fw_data = (const __le32 *)
4348 (adev->gfx.me_fw->data +
4349 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4350 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4351 WREG32(mmCP_ME_RAM_WADDR, 0);
4352 for (i = 0; i < fw_size; i++)
4353 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4354 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4355
4356 return 0;
4357}
4358
4359static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4360{
4361 u32 count = 0;
4362 const struct cs_section_def *sect = NULL;
4363 const struct cs_extent_def *ext = NULL;
4364
4365 /* begin clear state */
4366 count += 2;
4367 /* context control state */
4368 count += 3;
4369
4370 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4371 for (ext = sect->section; ext->extent != NULL; ++ext) {
4372 if (sect->id == SECT_CONTEXT)
4373 count += 2 + ext->reg_count;
4374 else
4375 return 0;
4376 }
4377 }
4378 /* pa_sc_raster_config/pa_sc_raster_config1 */
4379 count += 4;
4380 /* end clear state */
4381 count += 2;
4382 /* clear state */
4383 count += 2;
4384
4385 return count;
4386}
4387
4388static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4389{
4390 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4391 const struct cs_section_def *sect = NULL;
4392 const struct cs_extent_def *ext = NULL;
4393 int r, i;
4394
4395 /* init the CP */
4396 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4397 WREG32(mmCP_ENDIAN_SWAP, 0);
4398 WREG32(mmCP_DEVICE_ID, 1);
4399
4400 gfx_v8_0_cp_gfx_enable(adev, true);
4401
a27de35c 4402 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4403 if (r) {
4404 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4405 return r;
4406 }
4407
4408 /* clear state buffer */
4409 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4410 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4411
4412 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4413 amdgpu_ring_write(ring, 0x80000000);
4414 amdgpu_ring_write(ring, 0x80000000);
4415
4416 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4417 for (ext = sect->section; ext->extent != NULL; ++ext) {
4418 if (sect->id == SECT_CONTEXT) {
4419 amdgpu_ring_write(ring,
4420 PACKET3(PACKET3_SET_CONTEXT_REG,
4421 ext->reg_count));
4422 amdgpu_ring_write(ring,
4423 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4424 for (i = 0; i < ext->reg_count; i++)
4425 amdgpu_ring_write(ring, ext->extent[i]);
4426 }
4427 }
4428 }
4429
4430 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4431 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4432 switch (adev->asic_type) {
4433 case CHIP_TONGA:
2cc0c0b5 4434 case CHIP_POLARIS10:
aaa36a97
AD
4435 amdgpu_ring_write(ring, 0x16000012);
4436 amdgpu_ring_write(ring, 0x0000002A);
4437 break;
2cc0c0b5 4438 case CHIP_POLARIS11:
c4642a47 4439 case CHIP_POLARIS12:
68182d90
FC
4440 amdgpu_ring_write(ring, 0x16000012);
4441 amdgpu_ring_write(ring, 0x00000000);
4442 break;
fa676048
FC
4443 case CHIP_FIJI:
4444 amdgpu_ring_write(ring, 0x3a00161a);
4445 amdgpu_ring_write(ring, 0x0000002e);
4446 break;
aaa36a97
AD
4447 case CHIP_CARRIZO:
4448 amdgpu_ring_write(ring, 0x00000002);
4449 amdgpu_ring_write(ring, 0x00000000);
4450 break;
d1a7f7aa
KW
4451 case CHIP_TOPAZ:
4452 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4453 0x00000000 : 0x00000002);
4454 amdgpu_ring_write(ring, 0x00000000);
4455 break;
e3c7656c
SL
4456 case CHIP_STONEY:
4457 amdgpu_ring_write(ring, 0x00000000);
4458 amdgpu_ring_write(ring, 0x00000000);
4459 break;
aaa36a97
AD
4460 default:
4461 BUG();
4462 }
4463
4464 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4465 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4466
4467 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4468 amdgpu_ring_write(ring, 0);
4469
4470 /* init the CE partitions */
4471 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4472 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4473 amdgpu_ring_write(ring, 0x8000);
4474 amdgpu_ring_write(ring, 0x8000);
4475
a27de35c 4476 amdgpu_ring_commit(ring);
aaa36a97
AD
4477
4478 return 0;
4479}
4480
4481static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4482{
4483 struct amdgpu_ring *ring;
4484 u32 tmp;
4485 u32 rb_bufsz;
42e8cb50 4486 u64 rb_addr, rptr_addr, wptr_gpu_addr;
aaa36a97
AD
4487 int r;
4488
4489 /* Set the write pointer delay */
4490 WREG32(mmCP_RB_WPTR_DELAY, 0);
4491
4492 /* set the RB to use vmid 0 */
4493 WREG32(mmCP_RB_VMID, 0);
4494
4495 /* Set ring buffer size */
4496 ring = &adev->gfx.gfx_ring[0];
4497 rb_bufsz = order_base_2(ring->ring_size / 8);
4498 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4499 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4500 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4501 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4502#ifdef __BIG_ENDIAN
4503 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4504#endif
4505 WREG32(mmCP_RB0_CNTL, tmp);
4506
4507 /* Initialize the ring buffer's read and write pointers */
4508 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4509 ring->wptr = 0;
536fbf94 4510 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
4511
4512 /* set the wb address wether it's enabled or not */
4513 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4514 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4515 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4516
42e8cb50
FM
4517 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4518 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4519 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
aaa36a97
AD
4520 mdelay(1);
4521 WREG32(mmCP_RB0_CNTL, tmp);
4522
4523 rb_addr = ring->gpu_addr >> 8;
4524 WREG32(mmCP_RB0_BASE, rb_addr);
4525 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4526
4527 /* no gfx doorbells on iceland */
4528 if (adev->asic_type != CHIP_TOPAZ) {
4529 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4530 if (ring->use_doorbell) {
4531 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4532 DOORBELL_OFFSET, ring->doorbell_index);
68182d90
FC
4533 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4534 DOORBELL_HIT, 0);
aaa36a97
AD
4535 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4536 DOORBELL_EN, 1);
4537 } else {
4538 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4539 DOORBELL_EN, 0);
4540 }
4541 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4542
4543 if (adev->asic_type == CHIP_TONGA) {
4544 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4545 DOORBELL_RANGE_LOWER,
4546 AMDGPU_DOORBELL_GFX_RING0);
4547 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4548
4549 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4550 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4551 }
4552
4553 }
4554
4555 /* start the ring */
f6bd7942 4556 amdgpu_ring_clear_ring(ring);
aaa36a97
AD
4557 gfx_v8_0_cp_gfx_start(adev);
4558 ring->ready = true;
4559 r = amdgpu_ring_test_ring(ring);
5003f278 4560 if (r)
aaa36a97 4561 ring->ready = false;
aaa36a97 4562
5003f278 4563 return r;
aaa36a97
AD
4564}
4565
4566static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4567{
4568 int i;
4569
4570 if (enable) {
4571 WREG32(mmCP_MEC_CNTL, 0);
4572 } else {
4573 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4574 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4575 adev->gfx.compute_ring[i].ready = false;
fcf17a43 4576 adev->gfx.kiq.ring.ready = false;
aaa36a97
AD
4577 }
4578 udelay(50);
4579}
4580
aaa36a97
AD
4581static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4582{
4583 const struct gfx_firmware_header_v1_0 *mec_hdr;
4584 const __le32 *fw_data;
4585 unsigned i, fw_size;
4586
4587 if (!adev->gfx.mec_fw)
4588 return -EINVAL;
4589
4590 gfx_v8_0_cp_compute_enable(adev, false);
4591
4592 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4593 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4594
4595 fw_data = (const __le32 *)
4596 (adev->gfx.mec_fw->data +
4597 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4598 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4599
4600 /* MEC1 */
4601 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4602 for (i = 0; i < fw_size; i++)
4603 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4604 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4605
4606 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4607 if (adev->gfx.mec2_fw) {
4608 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4609
4610 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4611 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4612
4613 fw_data = (const __le32 *)
4614 (adev->gfx.mec2_fw->data +
4615 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4616 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4617
4618 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4619 for (i = 0; i < fw_size; i++)
4620 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4621 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4622 }
4623
4624 return 0;
4625}
4626
4e638ae9
XY
4627/* KIQ functions */
4628static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4629{
4630 uint32_t tmp;
4631 struct amdgpu_device *adev = ring->adev;
4632
4633 /* tell RLC which is KIQ queue */
4634 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4635 tmp &= 0xffffff00;
4636 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4637 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4638 tmp |= 0x80;
4639 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4640}
4641
346586d5 4642static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4e638ae9 4643{
c3a49ab5 4644 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
f776952b
AD
4645 uint32_t scratch, tmp = 0;
4646 int r, i;
4647
4648 r = amdgpu_gfx_scratch_get(adev, &scratch);
4649 if (r) {
4650 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4651 return r;
4652 }
4653 WREG32(scratch, 0xCAFEDEAD);
4e638ae9 4654
346586d5 4655 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
f776952b
AD
4656 if (r) {
4657 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4658 amdgpu_gfx_scratch_free(adev, scratch);
4659 return r;
4660 }
346586d5
AD
4661 /* set resources */
4662 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4663 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4664 amdgpu_ring_write(kiq_ring, 0x000000FF); /* queue mask lo */
4665 amdgpu_ring_write(kiq_ring, 0); /* queue mask hi */
4666 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4667 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4668 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4669 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
c3a49ab5
AD
4670 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4671 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4672 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4673 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4674
4675 /* map queues */
4676 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4677 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3d7e30b3
AD
4678 amdgpu_ring_write(kiq_ring,
4679 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4680 amdgpu_ring_write(kiq_ring,
4681 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4682 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4683 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4684 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
c3a49ab5
AD
4685 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4686 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4687 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4688 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4689 }
f776952b
AD
4690 /* write to scratch for completion */
4691 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4692 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4693 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4e638ae9 4694 amdgpu_ring_commit(kiq_ring);
f776952b
AD
4695
4696 for (i = 0; i < adev->usec_timeout; i++) {
4697 tmp = RREG32(scratch);
4698 if (tmp == 0xDEADBEEF)
4699 break;
4700 DRM_UDELAY(1);
4701 }
4702 if (i >= adev->usec_timeout) {
c3a49ab5
AD
4703 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4704 scratch, tmp);
f776952b
AD
4705 r = -EINVAL;
4706 }
4707 amdgpu_gfx_scratch_free(adev, scratch);
4708
4709 return r;
4e638ae9
XY
4710}
4711
9d11ca9c
AD
4712static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
4713{
4714 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4715 uint32_t scratch, tmp = 0;
4716 int r, i;
4717
4718 r = amdgpu_gfx_scratch_get(adev, &scratch);
4719 if (r) {
4720 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4721 return r;
4722 }
4723 WREG32(scratch, 0xCAFEDEAD);
4724
4725 r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
4726 if (r) {
4727 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4728 amdgpu_gfx_scratch_free(adev, scratch);
4729 return r;
4730 }
4731 /* unmap queues */
4732 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4733 amdgpu_ring_write(kiq_ring,
4734 PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
4735 PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
4736 amdgpu_ring_write(kiq_ring, 0);
4737 amdgpu_ring_write(kiq_ring, 0);
4738 amdgpu_ring_write(kiq_ring, 0);
4739 amdgpu_ring_write(kiq_ring, 0);
4740 /* write to scratch for completion */
4741 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4742 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4743 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4744 amdgpu_ring_commit(kiq_ring);
4745
4746 for (i = 0; i < adev->usec_timeout; i++) {
4747 tmp = RREG32(scratch);
4748 if (tmp == 0xDEADBEEF)
4749 break;
4750 DRM_UDELAY(1);
4751 }
4752 if (i >= adev->usec_timeout) {
4753 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4754 scratch, tmp);
4755 r = -EINVAL;
4756 }
4757 amdgpu_gfx_scratch_free(adev, scratch);
4758
4759 return r;
4760}
4761
a2140e00 4762static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4e638ae9 4763{
015c2360 4764 struct amdgpu_device *adev = ring->adev;
a2140e00 4765 struct vi_mqd *mqd = ring->mqd_ptr;
4e638ae9
XY
4766 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4767 uint32_t tmp;
4768
4769 mqd->header = 0xC0310800;
4770 mqd->compute_pipelinestat_enable = 0x00000001;
4771 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4772 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4773 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4774 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4775 mqd->compute_misc_reserved = 0x00000003;
4776
34534610 4777 eop_base_addr = ring->eop_gpu_addr >> 8;
4e638ae9
XY
4778 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4779 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4780
4781 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4782 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4783 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4784 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4785
4786 mqd->cp_hqd_eop_control = tmp;
4787
4788 /* enable doorbell? */
bb215962
TSD
4789 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4790 CP_HQD_PQ_DOORBELL_CONTROL,
4791 DOORBELL_EN,
4792 ring->use_doorbell ? 1 : 0);
4e638ae9
XY
4793
4794 mqd->cp_hqd_pq_doorbell_control = tmp;
4795
4796 /* disable the queue if it's active */
4797 mqd->cp_hqd_dequeue_request = 0;
4798 mqd->cp_hqd_pq_rptr = 0;
4799 mqd->cp_hqd_pq_wptr = 0;
4800
4801 /* set the pointer to the MQD */
015c2360
AD
4802 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4803 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4e638ae9
XY
4804
4805 /* set MQD vmid to 0 */
4806 tmp = RREG32(mmCP_MQD_CONTROL);
4807 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4808 mqd->cp_mqd_control = tmp;
4809
4810 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4811 hqd_gpu_addr = ring->gpu_addr >> 8;
4812 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4813 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4814
4815 /* set up the HQD, this is similar to CP_RB0_CNTL */
4816 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4817 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4818 (order_base_2(ring->ring_size / 4) - 1));
4819 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4820 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4821#ifdef __BIG_ENDIAN
4822 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4823#endif
4824 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4825 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4826 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4827 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4828 mqd->cp_hqd_pq_control = tmp;
4829
4830 /* set the wb address whether it's enabled or not */
4831 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4832 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4833 mqd->cp_hqd_pq_rptr_report_addr_hi =
4834 upper_32_bits(wb_gpu_addr) & 0xffff;
4835
4836 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4837 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4838 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4839 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4840
4841 tmp = 0;
4842 /* enable the doorbell if requested */
4843 if (ring->use_doorbell) {
4844 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4845 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4846 DOORBELL_OFFSET, ring->doorbell_index);
4847
4848 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4849 DOORBELL_EN, 1);
4850 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4851 DOORBELL_SOURCE, 0);
4852 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4853 DOORBELL_HIT, 0);
4854 }
4855
4856 mqd->cp_hqd_pq_doorbell_control = tmp;
4857
4858 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4859 ring->wptr = 0;
4860 mqd->cp_hqd_pq_wptr = ring->wptr;
4861 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4862
4863 /* set the vmid for the queue */
4864 mqd->cp_hqd_vmid = 0;
4865
4866 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4867 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4868 mqd->cp_hqd_persistent_state = tmp;
4869
ed6f55d1
AD
4870 /* set MTYPE */
4871 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4872 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4873 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4874 mqd->cp_hqd_ib_control = tmp;
4875
4876 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4877 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4878 mqd->cp_hqd_iq_timer = tmp;
4879
4880 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4881 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4882 mqd->cp_hqd_ctx_save_control = tmp;
4883
4e638ae9
XY
4884 /* activate the queue */
4885 mqd->cp_hqd_active = 1;
4886
4887 return 0;
4888}
4889
a2140e00 4890static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
4e638ae9 4891{
015c2360 4892 struct amdgpu_device *adev = ring->adev;
a2140e00 4893 struct vi_mqd *mqd = ring->mqd_ptr;
4e638ae9
XY
4894 int j;
4895
4896 /* disable wptr polling */
0ac642c5 4897 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4e638ae9
XY
4898
4899 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4900 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4901
4902 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4903 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4904
4905 /* enable doorbell? */
4906 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4907
4908 /* disable the queue if it's active */
699d12b7 4909 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4e638ae9
XY
4910 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4911 for (j = 0; j < adev->usec_timeout; j++) {
699d12b7 4912 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4e638ae9
XY
4913 break;
4914 udelay(1);
4915 }
4916 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4917 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4918 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4919 }
4920
4921 /* set the pointer to the MQD */
4922 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4923 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4924
4925 /* set MQD vmid to 0 */
4926 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4927
4928 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4929 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4930 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4931
4932 /* set up the HQD, this is similar to CP_RB0_CNTL */
4933 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4934
4935 /* set the wb address whether it's enabled or not */
4936 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4937 mqd->cp_hqd_pq_rptr_report_addr_lo);
4938 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4939 mqd->cp_hqd_pq_rptr_report_addr_hi);
4940
4941 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4942 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4943 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4944
4945 /* enable the doorbell if requested */
4946 if (ring->use_doorbell) {
4947 if ((adev->asic_type == CHIP_CARRIZO) ||
a576fe51
AD
4948 (adev->asic_type == CHIP_FIJI) ||
4949 (adev->asic_type == CHIP_STONEY) ||
4950 (adev->asic_type == CHIP_POLARIS10) ||
4951 (adev->asic_type == CHIP_POLARIS11) ||
4952 (adev->asic_type == CHIP_POLARIS12)) {
4e638ae9
XY
4953 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4954 AMDGPU_DOORBELL_KIQ << 2);
4955 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4956 AMDGPU_DOORBELL_MEC_RING7 << 2);
4957 }
4958 }
4959 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4960
4961 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4962 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4963
4964 /* set the vmid for the queue */
4965 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4966
4967 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4968
4969 /* activate the queue */
4970 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4971
0ac642c5
TSD
4972 if (ring->use_doorbell)
4973 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4e638ae9
XY
4974
4975 return 0;
4976}
4977
a2140e00 4978static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4e638ae9
XY
4979{
4980 struct amdgpu_device *adev = ring->adev;
a2140e00 4981 struct vi_mqd *mqd = ring->mqd_ptr;
1fb37a3d 4982 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4e638ae9 4983
39300115 4984 gfx_v8_0_kiq_setting(ring);
4e638ae9 4985
b4e40676 4986 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
1fb37a3d
ML
4987 memset((void *)mqd, 0, sizeof(*mqd));
4988 mutex_lock(&adev->srbm_mutex);
4989 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
a2140e00 4990 gfx_v8_0_mqd_init(ring);
39300115 4991 gfx_v8_0_kiq_init_register(ring);
1fb37a3d
ML
4992 vi_srbm_select(adev, 0, 0, 0, 0);
4993 mutex_unlock(&adev->srbm_mutex);
4994
4995 if (adev->gfx.mec.mqd_backup[mqd_idx])
4996 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4997 } else { /* for GPU_RESET case */
4998 /* reset MQD to a clean status */
4999 if (adev->gfx.mec.mqd_backup[mqd_idx])
5000 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
5001
5002 /* reset ring buffer */
5003 ring->wptr = 0;
5004 amdgpu_ring_clear_ring(ring);
5005
39300115
AD
5006 mutex_lock(&adev->srbm_mutex);
5007 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5008 gfx_v8_0_kiq_init_register(ring);
5009 vi_srbm_select(adev, 0, 0, 0, 0);
5010 mutex_unlock(&adev->srbm_mutex);
1fb37a3d 5011 }
4e638ae9 5012
346586d5 5013 return 0;
39300115
AD
5014}
5015
5016static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
5017{
5018 struct amdgpu_device *adev = ring->adev;
5019 struct vi_mqd *mqd = ring->mqd_ptr;
5020 int mqd_idx = ring - &adev->gfx.compute_ring[0];
39300115
AD
5021
5022 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
5023 memset((void *)mqd, 0, sizeof(*mqd));
5024 mutex_lock(&adev->srbm_mutex);
5025 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5026 gfx_v8_0_mqd_init(ring);
5027 vi_srbm_select(adev, 0, 0, 0, 0);
5028 mutex_unlock(&adev->srbm_mutex);
5029
5030 if (adev->gfx.mec.mqd_backup[mqd_idx])
5031 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
5032 } else { /* for GPU_RESET case */
5033 /* reset MQD to a clean status */
5034 if (adev->gfx.mec.mqd_backup[mqd_idx])
5035 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
5036
5037 /* reset ring buffer */
5038 ring->wptr = 0;
5039 amdgpu_ring_clear_ring(ring);
5040 }
5041
c3a49ab5 5042 return 0;
4e638ae9
XY
5043}
5044
596c67d0 5045static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4e638ae9
XY
5046{
5047 struct amdgpu_ring *ring = NULL;
596c67d0 5048 int r = 0, i;
4e638ae9 5049
596c67d0 5050 gfx_v8_0_cp_compute_enable(adev, true);
4e638ae9
XY
5051
5052 ring = &adev->gfx.kiq.ring;
6a6f380f
AD
5053
5054 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5055 if (unlikely(r != 0))
5056 goto done;
5057
5058 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5059 if (!r) {
a2140e00 5060 r = gfx_v8_0_kiq_init_queue(ring);
596c67d0 5061 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 5062 ring->mqd_ptr = NULL;
4e638ae9 5063 }
6a6f380f
AD
5064 amdgpu_bo_unreserve(ring->mqd_obj);
5065 if (r)
5066 goto done;
4e638ae9 5067
4e638ae9
XY
5068 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5069 ring = &adev->gfx.compute_ring[i];
6a6f380f
AD
5070
5071 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5072 if (unlikely(r != 0))
5073 goto done;
5074 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5075 if (!r) {
39300115 5076 r = gfx_v8_0_kcq_init_queue(ring);
596c67d0 5077 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 5078 ring->mqd_ptr = NULL;
596c67d0 5079 }
6a6f380f
AD
5080 amdgpu_bo_unreserve(ring->mqd_obj);
5081 if (r)
5082 goto done;
c3a49ab5 5083 }
4e638ae9 5084
346586d5 5085 r = gfx_v8_0_kiq_kcq_enable(adev);
c3a49ab5
AD
5086 if (r)
5087 goto done;
5088
346586d5
AD
5089 /* Test KIQ */
5090 ring = &adev->gfx.kiq.ring;
5091 ring->ready = true;
5092 r = amdgpu_ring_test_ring(ring);
5093 if (r) {
5094 ring->ready = false;
5095 goto done;
5096 }
5097
5098 /* Test KCQs */
c3a49ab5
AD
5099 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5100 ring = &adev->gfx.compute_ring[i];
4e638ae9
XY
5101 ring->ready = true;
5102 r = amdgpu_ring_test_ring(ring);
5103 if (r)
5104 ring->ready = false;
5105 }
5106
6a6f380f
AD
5107done:
5108 return r;
4e638ae9
XY
5109}
5110
aaa36a97
AD
5111static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5112{
5113 int r;
5114
e3c7656c 5115 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
5116 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5117
e61710c5 5118 if (!adev->pp_enabled) {
e635ee07 5119 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
5120 /* legacy firmware loading */
5121 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5122 if (r)
5123 return r;
aaa36a97 5124
ba5c2a87
RZ
5125 r = gfx_v8_0_cp_compute_load_microcode(adev);
5126 if (r)
5127 return r;
5128 } else {
5129 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5130 AMDGPU_UCODE_ID_CP_CE);
5131 if (r)
5132 return -EINVAL;
5133
5134 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5135 AMDGPU_UCODE_ID_CP_PFP);
5136 if (r)
5137 return -EINVAL;
5138
5139 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5140 AMDGPU_UCODE_ID_CP_ME);
5141 if (r)
5142 return -EINVAL;
5143
951e0962
AD
5144 if (adev->asic_type == CHIP_TOPAZ) {
5145 r = gfx_v8_0_cp_compute_load_microcode(adev);
5146 if (r)
5147 return r;
5148 } else {
5149 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5150 AMDGPU_UCODE_ID_CP_MEC1);
5151 if (r)
5152 return -EINVAL;
5153 }
ba5c2a87 5154 }
aaa36a97
AD
5155 }
5156
5157 r = gfx_v8_0_cp_gfx_resume(adev);
5158 if (r)
5159 return r;
5160
b4e40676 5161 r = gfx_v8_0_kiq_resume(adev);
aaa36a97
AD
5162 if (r)
5163 return r;
5164
5165 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5166
5167 return 0;
5168}
5169
5170static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5171{
5172 gfx_v8_0_cp_gfx_enable(adev, enable);
5173 gfx_v8_0_cp_compute_enable(adev, enable);
5174}
5175
5fc3aeeb 5176static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
5177{
5178 int r;
5fc3aeeb 5179 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5180
5181 gfx_v8_0_init_golden_registers(adev);
aaa36a97
AD
5182 gfx_v8_0_gpu_init(adev);
5183
5184 r = gfx_v8_0_rlc_resume(adev);
5185 if (r)
5186 return r;
5187
5188 r = gfx_v8_0_cp_resume(adev);
aaa36a97
AD
5189
5190 return r;
5191}
5192
5fc3aeeb 5193static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 5194{
5fc3aeeb 5195 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5196
1d22a454
AD
5197 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5198 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
84f3f05b
XY
5199 if (amdgpu_sriov_vf(adev)) {
5200 pr_debug("For SRIOV client, shouldn't do anything.\n");
5201 return 0;
5202 }
9d11ca9c 5203 gfx_v8_0_kiq_kcq_disable(adev);
aaa36a97
AD
5204 gfx_v8_0_cp_enable(adev, false);
5205 gfx_v8_0_rlc_stop(adev);
aaa36a97 5206
62a86fc2
EH
5207 amdgpu_set_powergating_state(adev,
5208 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5209
aaa36a97
AD
5210 return 0;
5211}
5212
5fc3aeeb 5213static int gfx_v8_0_suspend(void *handle)
aaa36a97 5214{
5fc3aeeb 5215 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
b4e40676 5216 adev->gfx.in_suspend = true;
aaa36a97
AD
5217 return gfx_v8_0_hw_fini(adev);
5218}
5219
5fc3aeeb 5220static int gfx_v8_0_resume(void *handle)
aaa36a97 5221{
b4e40676 5222 int r;
5fc3aeeb 5223 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5224
b4e40676
DP
5225 r = gfx_v8_0_hw_init(adev);
5226 adev->gfx.in_suspend = false;
5227 return r;
aaa36a97
AD
5228}
5229
5fc3aeeb 5230static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5231{
5fc3aeeb 5232 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5233
aaa36a97
AD
5234 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5235 return false;
5236 else
5237 return true;
5238}
5239
5fc3aeeb 5240static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5241{
5242 unsigned i;
5fc3aeeb 5243 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5244
5245 for (i = 0; i < adev->usec_timeout; i++) {
5003f278 5246 if (gfx_v8_0_is_idle(handle))
aaa36a97 5247 return 0;
5003f278 5248
aaa36a97
AD
5249 udelay(1);
5250 }
5251 return -ETIMEDOUT;
5252}
5253
da146d3b 5254static bool gfx_v8_0_check_soft_reset(void *handle)
aaa36a97 5255{
3d7c6384 5256 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5257 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5258 u32 tmp;
5259
5260 /* GRBM_STATUS */
5261 tmp = RREG32(mmGRBM_STATUS);
5262 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5263 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5264 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5265 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5266 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3d7c6384
CZ
5267 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5268 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
aaa36a97
AD
5269 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5270 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5271 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5272 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
aaa36a97
AD
5273 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5274 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5275 }
5276
5277 /* GRBM_STATUS2 */
5278 tmp = RREG32(mmGRBM_STATUS2);
5279 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5280 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5281 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5282
3d7c6384
CZ
5283 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5284 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5285 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5286 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5287 SOFT_RESET_CPF, 1);
5288 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5289 SOFT_RESET_CPC, 1);
5290 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5291 SOFT_RESET_CPG, 1);
5292 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5293 SOFT_RESET_GRBM, 1);
5294 }
5295
aaa36a97
AD
5296 /* SRBM_STATUS */
5297 tmp = RREG32(mmSRBM_STATUS);
5298 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5299 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5300 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3d7c6384
CZ
5301 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5302 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5303 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
aaa36a97
AD
5304
5305 if (grbm_soft_reset || srbm_soft_reset) {
3d7c6384
CZ
5306 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5307 adev->gfx.srbm_soft_reset = srbm_soft_reset;
da146d3b 5308 return true;
3d7c6384 5309 } else {
3d7c6384
CZ
5310 adev->gfx.grbm_soft_reset = 0;
5311 adev->gfx.srbm_soft_reset = 0;
da146d3b 5312 return false;
3d7c6384 5313 }
3d7c6384 5314}
aaa36a97 5315
1057f20c
CZ
5316static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5317 struct amdgpu_ring *ring)
5318{
5319 int i;
5320
d1a5b250 5321 mutex_lock(&adev->srbm_mutex);
1057f20c
CZ
5322 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5323 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
35e259d5 5324 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2);
1057f20c
CZ
5325 for (i = 0; i < adev->usec_timeout; i++) {
5326 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5327 break;
5328 udelay(1);
5329 }
5330 }
d1a5b250
TSD
5331 vi_srbm_select(adev, 0, 0, 0, 0);
5332 mutex_unlock(&adev->srbm_mutex);
1057f20c
CZ
5333}
5334
5335static int gfx_v8_0_pre_soft_reset(void *handle)
5336{
5337 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5338 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5339
da146d3b
AD
5340 if ((!adev->gfx.grbm_soft_reset) &&
5341 (!adev->gfx.srbm_soft_reset))
1057f20c
CZ
5342 return 0;
5343
5344 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5345 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5346
5347 /* stop the rlc */
5348 gfx_v8_0_rlc_stop(adev);
5349
5350 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5351 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
aaa36a97
AD
5352 /* Disable GFX parsing/prefetching */
5353 gfx_v8_0_cp_gfx_enable(adev, false);
5354
1057f20c
CZ
5355 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5356 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5357 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5358 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5359 int i;
5360
5361 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5362 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5363
5364 gfx_v8_0_inactive_hqd(adev, ring);
5365 }
aaa36a97 5366 /* Disable MEC parsing/prefetching */
7776a693 5367 gfx_v8_0_cp_compute_enable(adev, false);
1057f20c 5368 }
7776a693 5369
1057f20c
CZ
5370 return 0;
5371}
7776a693 5372
3d7c6384
CZ
5373static int gfx_v8_0_soft_reset(void *handle)
5374{
5375 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5376 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5377 u32 tmp;
aaa36a97 5378
da146d3b
AD
5379 if ((!adev->gfx.grbm_soft_reset) &&
5380 (!adev->gfx.srbm_soft_reset))
3d7c6384 5381 return 0;
aaa36a97 5382
3d7c6384
CZ
5383 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5384 srbm_soft_reset = adev->gfx.srbm_soft_reset;
aaa36a97 5385
3d7c6384
CZ
5386 if (grbm_soft_reset || srbm_soft_reset) {
5387 tmp = RREG32(mmGMCON_DEBUG);
5388 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5389 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5390 WREG32(mmGMCON_DEBUG, tmp);
5391 udelay(50);
5392 }
aaa36a97 5393
3d7c6384
CZ
5394 if (grbm_soft_reset) {
5395 tmp = RREG32(mmGRBM_SOFT_RESET);
5396 tmp |= grbm_soft_reset;
5397 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5398 WREG32(mmGRBM_SOFT_RESET, tmp);
5399 tmp = RREG32(mmGRBM_SOFT_RESET);
aaa36a97 5400
3d7c6384 5401 udelay(50);
aaa36a97 5402
3d7c6384
CZ
5403 tmp &= ~grbm_soft_reset;
5404 WREG32(mmGRBM_SOFT_RESET, tmp);
5405 tmp = RREG32(mmGRBM_SOFT_RESET);
5406 }
7776a693 5407
3d7c6384
CZ
5408 if (srbm_soft_reset) {
5409 tmp = RREG32(mmSRBM_SOFT_RESET);
5410 tmp |= srbm_soft_reset;
5411 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5412 WREG32(mmSRBM_SOFT_RESET, tmp);
5413 tmp = RREG32(mmSRBM_SOFT_RESET);
7776a693 5414
aaa36a97 5415 udelay(50);
7776a693 5416
3d7c6384
CZ
5417 tmp &= ~srbm_soft_reset;
5418 WREG32(mmSRBM_SOFT_RESET, tmp);
5419 tmp = RREG32(mmSRBM_SOFT_RESET);
aaa36a97 5420 }
7776a693 5421
3d7c6384
CZ
5422 if (grbm_soft_reset || srbm_soft_reset) {
5423 tmp = RREG32(mmGMCON_DEBUG);
5424 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5425 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5426 WREG32(mmGMCON_DEBUG, tmp);
aaa36a97 5427 }
3d7c6384
CZ
5428
5429 /* Wait a little for things to settle down */
5430 udelay(50);
5431
aaa36a97
AD
5432 return 0;
5433}
5434
e4ae0fc3
CZ
5435static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5436 struct amdgpu_ring *ring)
5437{
d1a5b250 5438 mutex_lock(&adev->srbm_mutex);
e4ae0fc3
CZ
5439 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5440 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5441 WREG32(mmCP_HQD_PQ_RPTR, 0);
5442 WREG32(mmCP_HQD_PQ_WPTR, 0);
5443 vi_srbm_select(adev, 0, 0, 0, 0);
d1a5b250 5444 mutex_unlock(&adev->srbm_mutex);
e4ae0fc3
CZ
5445}
5446
5447static int gfx_v8_0_post_soft_reset(void *handle)
5448{
5449 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5450 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5451
da146d3b
AD
5452 if ((!adev->gfx.grbm_soft_reset) &&
5453 (!adev->gfx.srbm_soft_reset))
e4ae0fc3
CZ
5454 return 0;
5455
5456 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5457 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5458
5459 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5460 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5461 gfx_v8_0_cp_gfx_resume(adev);
5462
5463 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5464 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5465 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5466 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5467 int i;
5468
5469 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5470 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5471
5472 gfx_v8_0_init_hqd(adev, ring);
5473 }
b4e40676 5474 gfx_v8_0_kiq_resume(adev);
e4ae0fc3
CZ
5475 }
5476 gfx_v8_0_rlc_start(adev);
5477
aaa36a97
AD
5478 return 0;
5479}
5480
5481/**
5482 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5483 *
5484 * @adev: amdgpu_device pointer
5485 *
5486 * Fetches a GPU clock counter snapshot.
5487 * Returns the 64 bit clock counter snapshot.
5488 */
b95e31fd 5489static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5490{
5491 uint64_t clock;
5492
5493 mutex_lock(&adev->gfx.gpu_clock_mutex);
5494 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5495 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5496 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5497 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5498 return clock;
5499}
5500
5501static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5502 uint32_t vmid,
5503 uint32_t gds_base, uint32_t gds_size,
5504 uint32_t gws_base, uint32_t gws_size,
5505 uint32_t oa_base, uint32_t oa_size)
5506{
5507 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5508 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5509
5510 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5511 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5512
5513 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5514 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5515
5516 /* GDS Base */
5517 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5518 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5519 WRITE_DATA_DST_SEL(0)));
5520 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5521 amdgpu_ring_write(ring, 0);
5522 amdgpu_ring_write(ring, gds_base);
5523
5524 /* GDS Size */
5525 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5526 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5527 WRITE_DATA_DST_SEL(0)));
5528 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5529 amdgpu_ring_write(ring, 0);
5530 amdgpu_ring_write(ring, gds_size);
5531
5532 /* GWS */
5533 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5534 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5535 WRITE_DATA_DST_SEL(0)));
5536 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5537 amdgpu_ring_write(ring, 0);
5538 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5539
5540 /* OA */
5541 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5542 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5543 WRITE_DATA_DST_SEL(0)));
5544 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5545 amdgpu_ring_write(ring, 0);
5546 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5547}
5548
472259f0
TSD
5549static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5550{
bc24fbe9
TSD
5551 WREG32(mmSQ_IND_INDEX,
5552 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5553 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5554 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5555 (SQ_IND_INDEX__FORCE_READ_MASK));
472259f0
TSD
5556 return RREG32(mmSQ_IND_DATA);
5557}
5558
c5a60ce8
TSD
5559static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5560 uint32_t wave, uint32_t thread,
5561 uint32_t regno, uint32_t num, uint32_t *out)
5562{
5563 WREG32(mmSQ_IND_INDEX,
5564 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5565 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5566 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5567 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5568 (SQ_IND_INDEX__FORCE_READ_MASK) |
5569 (SQ_IND_INDEX__AUTO_INCR_MASK));
5570 while (num--)
5571 *(out++) = RREG32(mmSQ_IND_DATA);
5572}
5573
472259f0
TSD
5574static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5575{
5576 /* type 0 wave data */
5577 dst[(*no_fields)++] = 0;
5578 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5579 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5580 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5581 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5582 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5583 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5584 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5585 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5586 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5587 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5588 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5589 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
74f3ce31
TSD
5590 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5591 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5592 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5593 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5594 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5595 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
472259f0
TSD
5596}
5597
c5a60ce8
TSD
5598static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5599 uint32_t wave, uint32_t start,
5600 uint32_t size, uint32_t *dst)
5601{
5602 wave_read_regs(
5603 adev, simd, wave, 0,
5604 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5605}
5606
472259f0 5607
b95e31fd
AD
5608static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5609 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5610 .select_se_sh = &gfx_v8_0_select_se_sh,
472259f0 5611 .read_wave_data = &gfx_v8_0_read_wave_data,
c5a60ce8 5612 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
b95e31fd
AD
5613};
5614
5fc3aeeb 5615static int gfx_v8_0_early_init(void *handle)
aaa36a97 5616{
5fc3aeeb 5617 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5618
5619 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5620 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
b95e31fd 5621 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5622 gfx_v8_0_set_ring_funcs(adev);
5623 gfx_v8_0_set_irq_funcs(adev);
5624 gfx_v8_0_set_gds_init(adev);
dbff57bc 5625 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5626
5627 return 0;
5628}
5629
ccba7691
AD
5630static int gfx_v8_0_late_init(void *handle)
5631{
5632 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5633 int r;
5634
1d22a454
AD
5635 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5636 if (r)
5637 return r;
5638
5639 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5640 if (r)
5641 return r;
5642
ccba7691
AD
5643 /* requires IBs so do in late init after IB pool is initialized */
5644 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5645 if (r)
5646 return r;
5647
62a86fc2
EH
5648 amdgpu_set_powergating_state(adev,
5649 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5650
ccba7691
AD
5651 return 0;
5652}
5653
c2546f55
AD
5654static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5655 bool enable)
62a86fc2 5656{
c4642a47
JZ
5657 if ((adev->asic_type == CHIP_POLARIS11) ||
5658 (adev->asic_type == CHIP_POLARIS12))
c2546f55
AD
5659 /* Send msg to SMU via Powerplay */
5660 amdgpu_set_powergating_state(adev,
5661 AMD_IP_BLOCK_TYPE_SMC,
5662 enable ?
5663 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
62a86fc2 5664
61cb8cef 5665 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5666}
5667
c2546f55
AD
5668static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5669 bool enable)
62a86fc2 5670{
61cb8cef 5671 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5672}
5673
2cc0c0b5 5674static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5675 bool enable)
5676{
61cb8cef 5677 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5678}
5679
2c547165
AD
5680static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5681 bool enable)
5682{
61cb8cef 5683 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
2c547165
AD
5684}
5685
5686static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5687 bool enable)
5688{
61cb8cef 5689 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
2c547165
AD
5690
5691 /* Read any GFX register to wake up GFX. */
5692 if (!enable)
61cb8cef 5693 RREG32(mmDB_RENDER_CONTROL);
2c547165
AD
5694}
5695
5696static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5697 bool enable)
5698{
5699 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5700 cz_enable_gfx_cg_power_gating(adev, true);
5701 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5702 cz_enable_gfx_pipeline_power_gating(adev, true);
5703 } else {
5704 cz_enable_gfx_cg_power_gating(adev, false);
5705 cz_enable_gfx_pipeline_power_gating(adev, false);
5706 }
5707}
5708
5fc3aeeb 5709static int gfx_v8_0_set_powergating_state(void *handle,
5710 enum amd_powergating_state state)
aaa36a97 5711{
62a86fc2 5712 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7e913664 5713 bool enable = (state == AMD_PG_STATE_GATE);
62a86fc2 5714
ce137c04
ML
5715 if (amdgpu_sriov_vf(adev))
5716 return 0;
5717
62a86fc2 5718 switch (adev->asic_type) {
2c547165
AD
5719 case CHIP_CARRIZO:
5720 case CHIP_STONEY:
ad1830d5 5721
5c964221
RZ
5722 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5723 cz_enable_sck_slow_down_on_power_up(adev, true);
5724 cz_enable_sck_slow_down_on_power_down(adev, true);
5725 } else {
5726 cz_enable_sck_slow_down_on_power_up(adev, false);
5727 cz_enable_sck_slow_down_on_power_down(adev, false);
5728 }
5729 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5730 cz_enable_cp_power_gating(adev, true);
5731 else
5732 cz_enable_cp_power_gating(adev, false);
5733
ad1830d5 5734 cz_update_gfx_cg_power_gating(adev, enable);
2c547165
AD
5735
5736 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5737 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5738 else
5739 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5740
5741 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5742 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5743 else
5744 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5745 break;
2cc0c0b5 5746 case CHIP_POLARIS11:
c4642a47 5747 case CHIP_POLARIS12:
7ba0eb6d
AD
5748 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5749 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5750 else
5751 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5752
5753 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5754 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5755 else
5756 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5757
5758 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5759 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5760 else
7ba0eb6d 5761 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5762 break;
5763 default:
5764 break;
5765 }
5766
aaa36a97
AD
5767 return 0;
5768}
5769
ebd843d6
HR
5770static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5771{
5772 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5773 int data;
5774
ce137c04
ML
5775 if (amdgpu_sriov_vf(adev))
5776 *flags = 0;
5777
ebd843d6
HR
5778 /* AMD_CG_SUPPORT_GFX_MGCG */
5779 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5780 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5781 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5782
5783 /* AMD_CG_SUPPORT_GFX_CGLG */
5784 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5785 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5786 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5787
5788 /* AMD_CG_SUPPORT_GFX_CGLS */
5789 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5790 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5791
5792 /* AMD_CG_SUPPORT_GFX_CGTS */
5793 data = RREG32(mmCGTS_SM_CTRL_REG);
5794 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5795 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5796
5797 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5798 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5799 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5800
5801 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5802 data = RREG32(mmRLC_MEM_SLP_CNTL);
5803 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5804 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5805
5806 /* AMD_CG_SUPPORT_GFX_CP_LS */
5807 data = RREG32(mmCP_MEM_SLP_CNTL);
5808 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5809 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5810}
5811
79deaaf4 5812static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5813 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5814{
5815 uint32_t data;
5816
9559ef5b 5817 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5818
5819 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5820 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5821
5822 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f 5823 if (adev->asic_type == CHIP_STONEY)
62d2ce4b
TSD
5824 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5825 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5826 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5827 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5828 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5829 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5830 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5831 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5832 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5833 else
5834 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5835 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5836 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5837 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5838 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5839 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5840 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5841 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5842 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5843 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5844 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5845 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5846 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5847 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5848 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5849
5850 WREG32(mmRLC_SERDES_WR_CTRL, data);
5851}
5852
dbff57bc
AD
5853#define MSG_ENTER_RLC_SAFE_MODE 1
5854#define MSG_EXIT_RLC_SAFE_MODE 0
61cb8cef
TSD
5855#define RLC_GPR_REG2__REQ_MASK 0x00000001
5856#define RLC_GPR_REG2__REQ__SHIFT 0
5857#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5858#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
dbff57bc 5859
dbff57bc
AD
5860static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5861{
5862 u32 data;
5863 unsigned i;
5864
5865 data = RREG32(mmRLC_CNTL);
5866 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5867 return;
5868
5869 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5870 data |= RLC_SAFE_MODE__CMD_MASK;
5871 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5872 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5873 WREG32(mmRLC_SAFE_MODE, data);
5874
5875 for (i = 0; i < adev->usec_timeout; i++) {
5876 if ((RREG32(mmRLC_GPM_STAT) &
5877 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5878 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5879 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5880 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5881 break;
5882 udelay(1);
5883 }
5884
5885 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5886 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5887 break;
5888 udelay(1);
5889 }
5890 adev->gfx.rlc.in_safe_mode = true;
5891 }
5892}
5893
5894static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5895{
5896 u32 data = 0;
5897 unsigned i;
5898
5899 data = RREG32(mmRLC_CNTL);
5900 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5901 return;
5902
5903 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5904 if (adev->gfx.rlc.in_safe_mode) {
5905 data |= RLC_SAFE_MODE__CMD_MASK;
5906 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5907 WREG32(mmRLC_SAFE_MODE, data);
5908 adev->gfx.rlc.in_safe_mode = false;
5909 }
5910 }
5911
5912 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5913 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5914 break;
5915 udelay(1);
5916 }
5917}
5918
dbff57bc
AD
5919static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5920 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5921 .exit_safe_mode = iceland_exit_rlc_safe_mode
5922};
5923
dbff57bc
AD
5924static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5925 bool enable)
6e378858
EH
5926{
5927 uint32_t temp, data;
5928
dbff57bc
AD
5929 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5930
6e378858 5931 /* It is disabled by HW by default */
14698b6c
AD
5932 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5933 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
61cb8cef 5934 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
14698b6c 5935 /* 1 - RLC memory Light sleep */
61cb8cef 5936 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6e378858 5937
61cb8cef
TSD
5938 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5939 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
14698b6c 5940 }
6e378858
EH
5941
5942 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5943 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
5944 if (adev->flags & AMD_IS_APU)
5945 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5946 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5947 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5948 else
5949 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5950 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5951 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5952 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
5953
5954 if (temp != data)
5955 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5956
5957 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5958 gfx_v8_0_wait_for_rlc_serdes(adev);
5959
5960 /* 5 - clear mgcg override */
79deaaf4 5961 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 5962
14698b6c
AD
5963 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5964 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5965 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5966 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5967 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5968 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5969 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5970 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5971 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5972 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5973 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5974 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5975 if (temp != data)
5976 WREG32(mmCGTS_SM_CTRL_REG, data);
5977 }
6e378858
EH
5978 udelay(50);
5979
5980 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5981 gfx_v8_0_wait_for_rlc_serdes(adev);
5982 } else {
5983 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5984 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5985 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5986 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5987 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5988 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5989 if (temp != data)
5990 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5991
5992 /* 2 - disable MGLS in RLC */
5993 data = RREG32(mmRLC_MEM_SLP_CNTL);
5994 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5995 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5996 WREG32(mmRLC_MEM_SLP_CNTL, data);
5997 }
5998
5999 /* 3 - disable MGLS in CP */
6000 data = RREG32(mmCP_MEM_SLP_CNTL);
6001 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6002 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6003 WREG32(mmCP_MEM_SLP_CNTL, data);
6004 }
6005
6006 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6007 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6008 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6009 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6010 if (temp != data)
6011 WREG32(mmCGTS_SM_CTRL_REG, data);
6012
6013 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6014 gfx_v8_0_wait_for_rlc_serdes(adev);
6015
6016 /* 6 - set mgcg override */
79deaaf4 6017 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6018
6019 udelay(50);
6020
6021 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6022 gfx_v8_0_wait_for_rlc_serdes(adev);
6023 }
dbff57bc
AD
6024
6025 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
6026}
6027
dbff57bc
AD
6028static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6029 bool enable)
6e378858
EH
6030{
6031 uint32_t temp, temp1, data, data1;
6032
6033 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6034
dbff57bc
AD
6035 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6036
14698b6c 6037 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
6038 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6039 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6040 if (temp1 != data1)
6041 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6042
dd31ae9a 6043 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6e378858
EH
6044 gfx_v8_0_wait_for_rlc_serdes(adev);
6045
dd31ae9a 6046 /* 2 - clear cgcg override */
79deaaf4 6047 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
6048
6049 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6050 gfx_v8_0_wait_for_rlc_serdes(adev);
6051
dd31ae9a 6052 /* 3 - write cmd to set CGLS */
79deaaf4 6053 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858 6054
dd31ae9a 6055 /* 4 - enable cgcg */
6e378858
EH
6056 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6057
14698b6c
AD
6058 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6059 /* enable cgls*/
6060 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 6061
14698b6c
AD
6062 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6063 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 6064
14698b6c
AD
6065 if (temp1 != data1)
6066 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6067 } else {
6068 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6069 }
6e378858
EH
6070
6071 if (temp != data)
6072 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
dd31ae9a
AN
6073
6074 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6075 * Cmp_busy/GFX_Idle interrupts
6076 */
6077 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858
EH
6078 } else {
6079 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6080 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6081
6082 /* TEST CGCG */
6083 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6084 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6085 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6086 if (temp1 != data1)
6087 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6088
6089 /* read gfx register to wake up cgcg */
6090 RREG32(mmCB_CGTT_SCLK_CTRL);
6091 RREG32(mmCB_CGTT_SCLK_CTRL);
6092 RREG32(mmCB_CGTT_SCLK_CTRL);
6093 RREG32(mmCB_CGTT_SCLK_CTRL);
6094
6095 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6096 gfx_v8_0_wait_for_rlc_serdes(adev);
6097
6098 /* write cmd to Set CGCG Overrride */
79deaaf4 6099 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6100
6101 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6102 gfx_v8_0_wait_for_rlc_serdes(adev);
6103
6104 /* write cmd to Clear CGLS */
79deaaf4 6105 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
6106
6107 /* disable cgcg, cgls should be disabled too. */
6108 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 6109 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
6110 if (temp != data)
6111 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6112 }
dbff57bc 6113
7894745a
TSD
6114 gfx_v8_0_wait_for_rlc_serdes(adev);
6115
dbff57bc 6116 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 6117}
dbff57bc
AD
6118static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6119 bool enable)
6e378858
EH
6120{
6121 if (enable) {
6122 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6123 * === MGCG + MGLS + TS(CG/LS) ===
6124 */
dbff57bc
AD
6125 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6126 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
6127 } else {
6128 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6129 * === CGCG + CGLS ===
6130 */
dbff57bc
AD
6131 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6132 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
6133 }
6134 return 0;
6135}
6136
a8ca3413
RZ
6137static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6138 enum amd_clockgating_state state)
6139{
8a19e7fa
RZ
6140 uint32_t msg_id, pp_state = 0;
6141 uint32_t pp_support_state = 0;
a8ca3413
RZ
6142 void *pp_handle = adev->powerplay.pp_handle;
6143
8a19e7fa
RZ
6144 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6145 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6146 pp_support_state = PP_STATE_SUPPORT_LS;
6147 pp_state = PP_STATE_LS;
6148 }
6149 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6150 pp_support_state |= PP_STATE_SUPPORT_CG;
6151 pp_state |= PP_STATE_CG;
6152 }
6153 if (state == AMD_CG_STATE_UNGATE)
6154 pp_state = 0;
6155
6156 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6157 PP_BLOCK_GFX_CG,
6158 pp_support_state,
6159 pp_state);
6160 amd_set_clockgating_by_smu(pp_handle, msg_id);
6161 }
a8ca3413 6162
8a19e7fa
RZ
6163 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6164 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6165 pp_support_state = PP_STATE_SUPPORT_LS;
6166 pp_state = PP_STATE_LS;
6167 }
a8ca3413 6168
8a19e7fa
RZ
6169 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6170 pp_support_state |= PP_STATE_SUPPORT_CG;
6171 pp_state |= PP_STATE_CG;
6172 }
6173
6174 if (state == AMD_CG_STATE_UNGATE)
6175 pp_state = 0;
6176
6177 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6178 PP_BLOCK_GFX_MG,
6179 pp_support_state,
6180 pp_state);
6181 amd_set_clockgating_by_smu(pp_handle, msg_id);
6182 }
a8ca3413
RZ
6183
6184 return 0;
6185}
6186
6187static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6188 enum amd_clockgating_state state)
6189{
8a19e7fa
RZ
6190
6191 uint32_t msg_id, pp_state = 0;
6192 uint32_t pp_support_state = 0;
a8ca3413
RZ
6193 void *pp_handle = adev->powerplay.pp_handle;
6194
8a19e7fa
RZ
6195 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6196 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6197 pp_support_state = PP_STATE_SUPPORT_LS;
6198 pp_state = PP_STATE_LS;
6199 }
6200 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6201 pp_support_state |= PP_STATE_SUPPORT_CG;
6202 pp_state |= PP_STATE_CG;
6203 }
6204 if (state == AMD_CG_STATE_UNGATE)
6205 pp_state = 0;
6206
6207 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6208 PP_BLOCK_GFX_CG,
6209 pp_support_state,
6210 pp_state);
6211 amd_set_clockgating_by_smu(pp_handle, msg_id);
6212 }
a8ca3413 6213
8a19e7fa
RZ
6214 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6215 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6216 pp_support_state = PP_STATE_SUPPORT_LS;
6217 pp_state = PP_STATE_LS;
6218 }
6219 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6220 pp_support_state |= PP_STATE_SUPPORT_CG;
6221 pp_state |= PP_STATE_CG;
6222 }
6223 if (state == AMD_CG_STATE_UNGATE)
6224 pp_state = 0;
6225
6226 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6227 PP_BLOCK_GFX_3D,
6228 pp_support_state,
6229 pp_state);
6230 amd_set_clockgating_by_smu(pp_handle, msg_id);
6231 }
a8ca3413 6232
8a19e7fa
RZ
6233 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6234 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6235 pp_support_state = PP_STATE_SUPPORT_LS;
6236 pp_state = PP_STATE_LS;
6237 }
a8ca3413 6238
8a19e7fa
RZ
6239 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6240 pp_support_state |= PP_STATE_SUPPORT_CG;
6241 pp_state |= PP_STATE_CG;
6242 }
a8ca3413 6243
8a19e7fa
RZ
6244 if (state == AMD_CG_STATE_UNGATE)
6245 pp_state = 0;
a8ca3413 6246
8a19e7fa
RZ
6247 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6248 PP_BLOCK_GFX_MG,
6249 pp_support_state,
6250 pp_state);
6251 amd_set_clockgating_by_smu(pp_handle, msg_id);
6252 }
6253
6254 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6255 pp_support_state = PP_STATE_SUPPORT_LS;
6256
6257 if (state == AMD_CG_STATE_UNGATE)
6258 pp_state = 0;
6259 else
6260 pp_state = PP_STATE_LS;
6261
6262 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6263 PP_BLOCK_GFX_RLC,
6264 pp_support_state,
6265 pp_state);
6266 amd_set_clockgating_by_smu(pp_handle, msg_id);
6267 }
6268
6269 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6270 pp_support_state = PP_STATE_SUPPORT_LS;
6271
6272 if (state == AMD_CG_STATE_UNGATE)
6273 pp_state = 0;
6274 else
6275 pp_state = PP_STATE_LS;
6276 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
a8ca3413 6277 PP_BLOCK_GFX_CP,
8a19e7fa 6278 pp_support_state,
a8ca3413 6279 pp_state);
8a19e7fa
RZ
6280 amd_set_clockgating_by_smu(pp_handle, msg_id);
6281 }
a8ca3413
RZ
6282
6283 return 0;
6284}
6285
5fc3aeeb 6286static int gfx_v8_0_set_clockgating_state(void *handle,
6287 enum amd_clockgating_state state)
aaa36a97 6288{
6e378858
EH
6289 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6290
ce137c04
ML
6291 if (amdgpu_sriov_vf(adev))
6292 return 0;
6293
6e378858
EH
6294 switch (adev->asic_type) {
6295 case CHIP_FIJI:
dbff57bc
AD
6296 case CHIP_CARRIZO:
6297 case CHIP_STONEY:
6298 gfx_v8_0_update_gfx_clock_gating(adev,
7e913664 6299 state == AMD_CG_STATE_GATE);
6e378858 6300 break;
a8ca3413
RZ
6301 case CHIP_TONGA:
6302 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6303 break;
6304 case CHIP_POLARIS10:
6305 case CHIP_POLARIS11:
739e9fff 6306 case CHIP_POLARIS12:
a8ca3413
RZ
6307 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6308 break;
6e378858
EH
6309 default:
6310 break;
6311 }
aaa36a97
AD
6312 return 0;
6313}
6314
536fbf94 6315static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
aaa36a97 6316{
5003f278 6317 return ring->adev->wb.wb[ring->rptr_offs];
aaa36a97
AD
6318}
6319
536fbf94 6320static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
aaa36a97
AD
6321{
6322 struct amdgpu_device *adev = ring->adev;
aaa36a97
AD
6323
6324 if (ring->use_doorbell)
6325 /* XXX check if swapping is necessary on BE */
5003f278 6326 return ring->adev->wb.wb[ring->wptr_offs];
aaa36a97 6327 else
5003f278 6328 return RREG32(mmCP_RB0_WPTR);
aaa36a97
AD
6329}
6330
6331static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6332{
6333 struct amdgpu_device *adev = ring->adev;
6334
6335 if (ring->use_doorbell) {
6336 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6337 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6338 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97 6339 } else {
536fbf94 6340 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
6341 (void)RREG32(mmCP_RB0_WPTR);
6342 }
6343}
6344
d2edb07b 6345static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
6346{
6347 u32 ref_and_mask, reg_mem_engine;
6348
4e638ae9
XY
6349 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6350 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
aaa36a97
AD
6351 switch (ring->me) {
6352 case 1:
6353 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6354 break;
6355 case 2:
6356 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6357 break;
6358 default:
6359 return;
6360 }
6361 reg_mem_engine = 0;
6362 } else {
6363 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6364 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6365 }
6366
6367 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6368 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6369 WAIT_REG_MEM_FUNCTION(3) | /* == */
6370 reg_mem_engine));
6371 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6372 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6373 amdgpu_ring_write(ring, ref_and_mask);
6374 amdgpu_ring_write(ring, ref_and_mask);
6375 amdgpu_ring_write(ring, 0x20); /* poll interval */
6376}
6377
45682886
ML
6378static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6379{
6380 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6381 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6382 EVENT_INDEX(4));
6383
6384 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6385 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6386 EVENT_INDEX(0));
6387}
6388
6389
d35db561
CZ
6390static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6391{
6392 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6393 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6394 WRITE_DATA_DST_SEL(0) |
6395 WR_CONFIRM));
6396 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6397 amdgpu_ring_write(ring, 0);
6398 amdgpu_ring_write(ring, 1);
6399
6400}
6401
93323131 6402static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583
CK
6403 struct amdgpu_ib *ib,
6404 unsigned vm_id, bool ctx_switch)
aaa36a97
AD
6405{
6406 u32 header, control = 0;
aaa36a97 6407
de807f81 6408 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
6409 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6410 else
6411 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6412
d88bf583 6413 control |= ib->length_dw | (vm_id << 24);
aaa36a97 6414
2e2e3c7f
ML
6415 if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT)
6416 control |= INDIRECT_BUFFER_PRE_ENB(1);
6417
aaa36a97
AD
6418 amdgpu_ring_write(ring, header);
6419 amdgpu_ring_write(ring,
6420#ifdef __BIG_ENDIAN
6421 (2 << 0) |
6422#endif
6423 (ib->gpu_addr & 0xFFFFFFFC));
6424 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6425 amdgpu_ring_write(ring, control);
6426}
6427
93323131 6428static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583
CK
6429 struct amdgpu_ib *ib,
6430 unsigned vm_id, bool ctx_switch)
93323131 6431{
33b7ed01 6432 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
93323131 6433
33b7ed01 6434 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 6435 amdgpu_ring_write(ring,
6436#ifdef __BIG_ENDIAN
62d2ce4b 6437 (2 << 0) |
93323131 6438#endif
62d2ce4b 6439 (ib->gpu_addr & 0xFFFFFFFC));
93323131 6440 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6441 amdgpu_ring_write(ring, control);
6442}
6443
aaa36a97 6444static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 6445 u64 seq, unsigned flags)
aaa36a97 6446{
890ee23f
CZ
6447 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6448 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6449
aaa36a97
AD
6450 /* EVENT_WRITE_EOP - flush caches, send int */
6451 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6452 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6453 EOP_TC_ACTION_EN |
f84e63f2 6454 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6455 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6456 EVENT_INDEX(5)));
6457 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 6458 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 6459 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6460 amdgpu_ring_write(ring, lower_32_bits(seq));
6461 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 6462
aaa36a97
AD
6463}
6464
b8c7b39e 6465static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97 6466{
21cd942e 6467 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 6468 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
6469 uint64_t addr = ring->fence_drv.gpu_addr;
6470
6471 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6472 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6473 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6474 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6475 amdgpu_ring_write(ring, addr & 0xfffffffc);
6476 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6477 amdgpu_ring_write(ring, seq);
6478 amdgpu_ring_write(ring, 0xffffffff);
6479 amdgpu_ring_write(ring, 4); /* poll interval */
b8c7b39e
CK
6480}
6481
6482static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6483 unsigned vm_id, uint64_t pd_addr)
6484{
21cd942e 6485 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6486
aaa36a97
AD
6487 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6488 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
6489 WRITE_DATA_DST_SEL(0)) |
6490 WR_CONFIRM);
aaa36a97
AD
6491 if (vm_id < 8) {
6492 amdgpu_ring_write(ring,
6493 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6494 } else {
6495 amdgpu_ring_write(ring,
6496 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6497 }
6498 amdgpu_ring_write(ring, 0);
6499 amdgpu_ring_write(ring, pd_addr >> 12);
6500
aaa36a97
AD
6501 /* bits 0-15 are the VM contexts0-15 */
6502 /* invalidate the cache */
6503 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6504 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6505 WRITE_DATA_DST_SEL(0)));
6506 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6507 amdgpu_ring_write(ring, 0);
6508 amdgpu_ring_write(ring, 1 << vm_id);
6509
6510 /* wait for the invalidate to complete */
6511 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6512 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6513 WAIT_REG_MEM_FUNCTION(0) | /* always */
6514 WAIT_REG_MEM_ENGINE(0))); /* me */
6515 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6516 amdgpu_ring_write(ring, 0);
6517 amdgpu_ring_write(ring, 0); /* ref */
6518 amdgpu_ring_write(ring, 0); /* mask */
6519 amdgpu_ring_write(ring, 0x20); /* poll interval */
6520
6521 /* compute doesn't have PFP */
6522 if (usepfp) {
6523 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6524 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6525 amdgpu_ring_write(ring, 0x0);
aaa36a97
AD
6526 }
6527}
6528
536fbf94 6529static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
aaa36a97
AD
6530{
6531 return ring->adev->wb.wb[ring->wptr_offs];
6532}
6533
6534static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6535{
6536 struct amdgpu_device *adev = ring->adev;
6537
6538 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6539 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6540 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97
AD
6541}
6542
6543static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6544 u64 addr, u64 seq,
890ee23f 6545 unsigned flags)
aaa36a97 6546{
890ee23f
CZ
6547 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6548 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6549
aaa36a97
AD
6550 /* RELEASE_MEM - flush caches, send int */
6551 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6552 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6553 EOP_TC_ACTION_EN |
a3d5aaa8 6554 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6555 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6556 EVENT_INDEX(5)));
890ee23f 6557 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6558 amdgpu_ring_write(ring, addr & 0xfffffffc);
6559 amdgpu_ring_write(ring, upper_32_bits(addr));
6560 amdgpu_ring_write(ring, lower_32_bits(seq));
6561 amdgpu_ring_write(ring, upper_32_bits(seq));
6562}
6563
4e638ae9
XY
6564static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6565 u64 seq, unsigned int flags)
6566{
6567 /* we only allocate 32bit for each seq wb address */
f10b478d 6568 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4e638ae9
XY
6569
6570 /* write fence seq to the "addr" */
6571 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6572 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6573 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6574 amdgpu_ring_write(ring, lower_32_bits(addr));
6575 amdgpu_ring_write(ring, upper_32_bits(addr));
6576 amdgpu_ring_write(ring, lower_32_bits(seq));
6577
6578 if (flags & AMDGPU_FENCE_FLAG_INT) {
6579 /* set register to trigger INT */
6580 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6581 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6582 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6583 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6584 amdgpu_ring_write(ring, 0);
6585 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6586 }
6587}
6588
c2167a65
ML
6589static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6590{
6591 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6592 amdgpu_ring_write(ring, 0);
6593}
6594
753ad49c
ML
6595static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6596{
6597 uint32_t dw2 = 0;
6598
c2ce92fc
ML
6599 if (amdgpu_sriov_vf(ring->adev))
6600 gfx_v8_0_ring_emit_ce_meta_init(ring,
6601 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6602
753ad49c
ML
6603 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6604 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
45682886 6605 gfx_v8_0_ring_emit_vgt_flush(ring);
753ad49c
ML
6606 /* set load_global_config & load_global_uconfig */
6607 dw2 |= 0x8001;
6608 /* set load_cs_sh_regs */
6609 dw2 |= 0x01000000;
6610 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6611 dw2 |= 0x10002;
6612
6613 /* set load_ce_ram if preamble presented */
6614 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6615 dw2 |= 0x10000000;
6616 } else {
6617 /* still load_ce_ram if this is the first time preamble presented
6618 * although there is no context switch happens.
6619 */
6620 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6621 dw2 |= 0x10000000;
6622 }
6623
6624 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6625 amdgpu_ring_write(ring, dw2);
6626 amdgpu_ring_write(ring, 0);
c2ce92fc
ML
6627
6628 if (amdgpu_sriov_vf(ring->adev))
6629 gfx_v8_0_ring_emit_de_meta_init(ring,
6630 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
753ad49c
ML
6631}
6632
806ba2d4
ML
6633static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6634{
6635 unsigned ret;
6636
6637 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6638 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6639 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6640 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6641 ret = ring->wptr & ring->buf_mask;
6642 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6643 return ret;
6644}
6645
6646static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6647{
6648 unsigned cur;
6649
6650 BUG_ON(offset > ring->buf_mask);
6651 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6652
6653 cur = (ring->wptr & ring->buf_mask) - 1;
6654 if (likely(cur > offset))
6655 ring->ring[offset] = cur - offset;
6656 else
6657 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6658}
6659
6660
880e87e3
XY
6661static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6662{
6663 struct amdgpu_device *adev = ring->adev;
6664
6665 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6666 amdgpu_ring_write(ring, 0 | /* src: register*/
6667 (5 << 8) | /* dst: memory */
6668 (1 << 20)); /* write confirm */
6669 amdgpu_ring_write(ring, reg);
6670 amdgpu_ring_write(ring, 0);
6671 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6672 adev->virt.reg_val_offs * 4));
6673 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6674 adev->virt.reg_val_offs * 4));
6675}
6676
6677static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6678 uint32_t val)
6679{
6680 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6681 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6682 amdgpu_ring_write(ring, reg);
6683 amdgpu_ring_write(ring, 0);
6684 amdgpu_ring_write(ring, val);
6685}
6686
aaa36a97
AD
6687static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6688 enum amdgpu_interrupt_state state)
6689{
61cb8cef
TSD
6690 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6691 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6692}
6693
6694static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6695 int me, int pipe,
6696 enum amdgpu_interrupt_state state)
6697{
aaa36a97
AD
6698 /*
6699 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6700 * handles the setting of interrupts for this specific pipe. All other
6701 * pipes' interrupts are set by amdkfd.
6702 */
6703
6704 if (me == 1) {
6705 switch (pipe) {
6706 case 0:
aaa36a97
AD
6707 break;
6708 default:
6709 DRM_DEBUG("invalid pipe %d\n", pipe);
6710 return;
6711 }
6712 } else {
6713 DRM_DEBUG("invalid me %d\n", me);
6714 return;
6715 }
6716
61cb8cef
TSD
6717 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6718 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6719}
6720
6721static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6722 struct amdgpu_irq_src *source,
6723 unsigned type,
6724 enum amdgpu_interrupt_state state)
6725{
61cb8cef
TSD
6726 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6727 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6728
6729 return 0;
6730}
6731
6732static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6733 struct amdgpu_irq_src *source,
6734 unsigned type,
6735 enum amdgpu_interrupt_state state)
6736{
61cb8cef
TSD
6737 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6738 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6739
6740 return 0;
6741}
6742
6743static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6744 struct amdgpu_irq_src *src,
6745 unsigned type,
6746 enum amdgpu_interrupt_state state)
6747{
6748 switch (type) {
6749 case AMDGPU_CP_IRQ_GFX_EOP:
6750 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6751 break;
6752 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6753 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6754 break;
6755 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6756 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6757 break;
6758 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6759 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6760 break;
6761 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6762 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6763 break;
6764 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6765 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6766 break;
6767 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6768 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6769 break;
6770 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6771 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6772 break;
6773 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6774 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6775 break;
6776 default:
6777 break;
6778 }
6779 return 0;
6780}
6781
6782static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6783 struct amdgpu_irq_src *source,
6784 struct amdgpu_iv_entry *entry)
6785{
6786 int i;
6787 u8 me_id, pipe_id, queue_id;
6788 struct amdgpu_ring *ring;
6789
6790 DRM_DEBUG("IH: CP EOP\n");
6791 me_id = (entry->ring_id & 0x0c) >> 2;
6792 pipe_id = (entry->ring_id & 0x03) >> 0;
6793 queue_id = (entry->ring_id & 0x70) >> 4;
6794
6795 switch (me_id) {
6796 case 0:
6797 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6798 break;
6799 case 1:
6800 case 2:
6801 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6802 ring = &adev->gfx.compute_ring[i];
6803 /* Per-queue interrupt is supported for MEC starting from VI.
6804 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6805 */
6806 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6807 amdgpu_fence_process(ring);
6808 }
6809 break;
6810 }
6811 return 0;
6812}
6813
6814static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6815 struct amdgpu_irq_src *source,
6816 struct amdgpu_iv_entry *entry)
6817{
6818 DRM_ERROR("Illegal register access in command stream\n");
6819 schedule_work(&adev->reset_work);
6820 return 0;
6821}
6822
6823static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6824 struct amdgpu_irq_src *source,
6825 struct amdgpu_iv_entry *entry)
6826{
6827 DRM_ERROR("Illegal instruction in command stream\n");
6828 schedule_work(&adev->reset_work);
6829 return 0;
6830}
6831
4e638ae9
XY
6832static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6833 struct amdgpu_irq_src *src,
6834 unsigned int type,
6835 enum amdgpu_interrupt_state state)
6836{
07c397f9 6837 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6838
07c397f9 6839 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
4e638ae9 6840
4e638ae9
XY
6841 switch (type) {
6842 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
ccaf3574
TSD
6843 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6844 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6845 if (ring->me == 1)
6846 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6847 ring->pipe,
6848 GENERIC2_INT_ENABLE,
6849 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6850 else
6851 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6852 ring->pipe,
6853 GENERIC2_INT_ENABLE,
6854 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
4e638ae9
XY
6855 break;
6856 default:
6857 BUG(); /* kiq only support GENERIC2_INT now */
6858 break;
6859 }
6860 return 0;
6861}
6862
6863static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6864 struct amdgpu_irq_src *source,
6865 struct amdgpu_iv_entry *entry)
6866{
6867 u8 me_id, pipe_id, queue_id;
07c397f9 6868 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6869
07c397f9 6870 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
4e638ae9
XY
6871
6872 me_id = (entry->ring_id & 0x0c) >> 2;
6873 pipe_id = (entry->ring_id & 0x03) >> 0;
6874 queue_id = (entry->ring_id & 0x70) >> 4;
6875 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6876 me_id, pipe_id, queue_id);
6877
6878 amdgpu_fence_process(ring);
6879 return 0;
6880}
6881
a1255107 6882static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 6883 .name = "gfx_v8_0",
aaa36a97 6884 .early_init = gfx_v8_0_early_init,
ccba7691 6885 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
6886 .sw_init = gfx_v8_0_sw_init,
6887 .sw_fini = gfx_v8_0_sw_fini,
6888 .hw_init = gfx_v8_0_hw_init,
6889 .hw_fini = gfx_v8_0_hw_fini,
6890 .suspend = gfx_v8_0_suspend,
6891 .resume = gfx_v8_0_resume,
6892 .is_idle = gfx_v8_0_is_idle,
6893 .wait_for_idle = gfx_v8_0_wait_for_idle,
3d7c6384 6894 .check_soft_reset = gfx_v8_0_check_soft_reset,
1057f20c 6895 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
aaa36a97 6896 .soft_reset = gfx_v8_0_soft_reset,
e4ae0fc3 6897 .post_soft_reset = gfx_v8_0_post_soft_reset,
aaa36a97
AD
6898 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6899 .set_powergating_state = gfx_v8_0_set_powergating_state,
ebd843d6 6900 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
aaa36a97
AD
6901};
6902
6903static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
21cd942e 6904 .type = AMDGPU_RING_TYPE_GFX,
79887142
CK
6905 .align_mask = 0xff,
6906 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6907 .support_64bit_ptrs = false,
e7706b42 6908 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
6909 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6910 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
e9d672b2
ML
6911 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6912 5 + /* COND_EXEC */
6913 7 + /* PIPELINE_SYNC */
6914 19 + /* VM_FLUSH */
6915 8 + /* FENCE for VM_FLUSH */
6916 20 + /* GDS switch */
6917 4 + /* double SWITCH_BUFFER,
6918 the first COND_EXEC jump to the place just
6919 prior to this double SWITCH_BUFFER */
6920 5 + /* COND_EXEC */
6921 7 + /* HDP_flush */
6922 4 + /* VGT_flush */
6923 14 + /* CE_META */
6924 31 + /* DE_META */
6925 3 + /* CNTX_CTRL */
6926 5 + /* HDP_INVL */
6927 8 + 8 + /* FENCE x2 */
6928 2, /* SWITCH_BUFFER */
e12f3d7a 6929 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
93323131 6930 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 6931 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 6932 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6933 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6934 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 6935 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 6936 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
6937 .test_ring = gfx_v8_0_ring_test_ring,
6938 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6939 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6940 .pad_ib = amdgpu_ring_generic_pad_ib,
c2167a65 6941 .emit_switch_buffer = gfx_v8_ring_emit_sb,
753ad49c 6942 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
806ba2d4
ML
6943 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6944 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
aaa36a97
AD
6945};
6946
6947static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
21cd942e 6948 .type = AMDGPU_RING_TYPE_COMPUTE,
79887142
CK
6949 .align_mask = 0xff,
6950 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6951 .support_64bit_ptrs = false,
e7706b42 6952 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
6953 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6954 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
e12f3d7a
CK
6955 .emit_frame_size =
6956 20 + /* gfx_v8_0_ring_emit_gds_switch */
6957 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6958 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6959 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6960 17 + /* gfx_v8_0_ring_emit_vm_flush */
6961 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6962 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
93323131 6963 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 6964 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 6965 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6966 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6967 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 6968 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 6969 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
6970 .test_ring = gfx_v8_0_ring_test_ring,
6971 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6972 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6973 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
6974};
6975
4e638ae9
XY
6976static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6977 .type = AMDGPU_RING_TYPE_KIQ,
6978 .align_mask = 0xff,
6979 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6980 .support_64bit_ptrs = false,
4e638ae9
XY
6981 .get_rptr = gfx_v8_0_ring_get_rptr,
6982 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6983 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6984 .emit_frame_size =
6985 20 + /* gfx_v8_0_ring_emit_gds_switch */
6986 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6987 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6988 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6989 17 + /* gfx_v8_0_ring_emit_vm_flush */
6990 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6991 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6992 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6993 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
4e638ae9
XY
6994 .test_ring = gfx_v8_0_ring_test_ring,
6995 .test_ib = gfx_v8_0_ring_test_ib,
6996 .insert_nop = amdgpu_ring_insert_nop,
6997 .pad_ib = amdgpu_ring_generic_pad_ib,
880e87e3
XY
6998 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6999 .emit_wreg = gfx_v8_0_ring_emit_wreg,
4e638ae9
XY
7000};
7001
aaa36a97
AD
7002static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7003{
7004 int i;
7005
4e638ae9
XY
7006 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7007
aaa36a97
AD
7008 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7009 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7010
7011 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7012 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7013}
7014
7015static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7016 .set = gfx_v8_0_set_eop_interrupt_state,
7017 .process = gfx_v8_0_eop_irq,
7018};
7019
7020static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7021 .set = gfx_v8_0_set_priv_reg_fault_state,
7022 .process = gfx_v8_0_priv_reg_irq,
7023};
7024
7025static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7026 .set = gfx_v8_0_set_priv_inst_fault_state,
7027 .process = gfx_v8_0_priv_inst_irq,
7028};
7029
4e638ae9
XY
7030static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7031 .set = gfx_v8_0_kiq_set_interrupt_state,
7032 .process = gfx_v8_0_kiq_irq,
7033};
7034
aaa36a97
AD
7035static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7036{
7037 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7038 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7039
7040 adev->gfx.priv_reg_irq.num_types = 1;
7041 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7042
7043 adev->gfx.priv_inst_irq.num_types = 1;
7044 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4e638ae9
XY
7045
7046 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7047 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
aaa36a97
AD
7048}
7049
dbff57bc
AD
7050static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7051{
ae6a58e4 7052 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
dbff57bc
AD
7053}
7054
aaa36a97
AD
7055static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7056{
7057 /* init asci gds info */
7058 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7059 adev->gds.gws.total_size = 64;
7060 adev->gds.oa.total_size = 16;
7061
7062 if (adev->gds.mem.total_size == 64 * 1024) {
7063 adev->gds.mem.gfx_partition_size = 4096;
7064 adev->gds.mem.cs_partition_size = 4096;
7065
7066 adev->gds.gws.gfx_partition_size = 4;
7067 adev->gds.gws.cs_partition_size = 4;
7068
7069 adev->gds.oa.gfx_partition_size = 4;
7070 adev->gds.oa.cs_partition_size = 1;
7071 } else {
7072 adev->gds.mem.gfx_partition_size = 1024;
7073 adev->gds.mem.cs_partition_size = 1024;
7074
7075 adev->gds.gws.gfx_partition_size = 16;
7076 adev->gds.gws.cs_partition_size = 16;
7077
7078 adev->gds.oa.gfx_partition_size = 4;
7079 adev->gds.oa.cs_partition_size = 4;
7080 }
7081}
7082
9de06de8
NH
7083static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7084 u32 bitmap)
7085{
7086 u32 data;
7087
7088 if (!bitmap)
7089 return;
7090
7091 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7092 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7093
7094 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7095}
7096
8f8e00c1 7097static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 7098{
8f8e00c1 7099 u32 data, mask;
aaa36a97 7100
5003f278
TSD
7101 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7102 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 7103
6157bd7a 7104 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 7105
5003f278 7106 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
aaa36a97
AD
7107}
7108
7dae69a2 7109static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
7110{
7111 int i, j, k, counter, active_cu_number = 0;
7112 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 7113 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 7114 unsigned disable_masks[4 * 2];
aaa36a97 7115
6157bd7a
FC
7116 memset(cu_info, 0, sizeof(*cu_info));
7117
9de06de8
NH
7118 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7119
aaa36a97
AD
7120 mutex_lock(&adev->grbm_idx_mutex);
7121 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7122 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7123 mask = 1;
7124 ao_bitmap = 0;
7125 counter = 0;
9559ef5b 7126 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
7127 if (i < 4 && j < 2)
7128 gfx_v8_0_set_user_cu_inactive_bitmap(
7129 adev, disable_masks[i * 2 + j]);
8f8e00c1 7130 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
7131 cu_info->bitmap[i][j] = bitmap;
7132
8f8e00c1 7133 for (k = 0; k < 16; k ++) {
aaa36a97
AD
7134 if (bitmap & mask) {
7135 if (counter < 2)
7136 ao_bitmap |= mask;
7137 counter ++;
7138 }
7139 mask <<= 1;
7140 }
7141 active_cu_number += counter;
7142 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7143 }
7144 }
9559ef5b 7145 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 7146 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
7147
7148 cu_info->number = active_cu_number;
7149 cu_info->ao_cu_mask = ao_cu_mask;
aaa36a97 7150}
a1255107
AD
7151
7152const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7153{
7154 .type = AMD_IP_BLOCK_TYPE_GFX,
7155 .major = 8,
7156 .minor = 0,
7157 .rev = 0,
7158 .funcs = &gfx_v8_0_ip_funcs,
7159};
7160
7161const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7162{
7163 .type = AMD_IP_BLOCK_TYPE_GFX,
7164 .major = 8,
7165 .minor = 1,
7166 .rev = 0,
7167 .funcs = &gfx_v8_0_ip_funcs,
7168};
acad2b2a
ML
7169
7170static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7171{
7172 uint64_t ce_payload_addr;
7173 int cnt_ce;
7174 static union {
49abb980
XY
7175 struct vi_ce_ib_state regular;
7176 struct vi_ce_ib_state_chained_ib chained;
e8411302 7177 } ce_payload = {};
acad2b2a
ML
7178
7179 if (ring->adev->virt.chained_ib_support) {
49abb980 7180 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
acad2b2a
ML
7181 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7182 } else {
49abb980 7183 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
acad2b2a
ML
7184 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7185 }
7186
7187 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7188 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7189 WRITE_DATA_DST_SEL(8) |
7190 WR_CONFIRM) |
7191 WRITE_DATA_CACHE_POLICY(0));
7192 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7193 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7194 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7195}
7196
7197static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7198{
7199 uint64_t de_payload_addr, gds_addr;
7200 int cnt_de;
7201 static union {
49abb980
XY
7202 struct vi_de_ib_state regular;
7203 struct vi_de_ib_state_chained_ib chained;
e8411302 7204 } de_payload = {};
acad2b2a
ML
7205
7206 gds_addr = csa_addr + 4096;
7207 if (ring->adev->virt.chained_ib_support) {
7208 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7209 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7210 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
acad2b2a
ML
7211 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7212 } else {
7213 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7214 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7215 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
acad2b2a
ML
7216 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7217 }
7218
7219 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7220 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7221 WRITE_DATA_DST_SEL(8) |
7222 WR_CONFIRM) |
7223 WRITE_DATA_CACHE_POLICY(0));
7224 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7225 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7226 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7227}
5ff98043
ML
7228
7229/* create MQD for each compute queue */
0875a242 7230static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
5ff98043
ML
7231{
7232 struct amdgpu_ring *ring = NULL;
7233 int r, i;
7234
7235 /* create MQD for KIQ */
7236 ring = &adev->gfx.kiq.ring;
7237 if (!ring->mqd_obj) {
7238 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7239 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7240 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7241 if (r) {
7242 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7243 return r;
7244 }
9b49c3ab
ML
7245
7246 /* prepare MQD backup */
7247 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7248 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7249 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7250 }
7251
7252 /* create MQD for each KCQ */
b0ac2a32 7253 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5ff98043
ML
7254 ring = &adev->gfx.compute_ring[i];
7255 if (!ring->mqd_obj) {
7256 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7257 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7258 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7259 if (r) {
7260 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7261 return r;
7262 }
9b49c3ab
ML
7263
7264 /* prepare MQD backup */
7265 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7266 if (!adev->gfx.mec.mqd_backup[i])
7267 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7268 }
7269 }
7270
7271 return 0;
7272}
7273
0875a242 7274static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
5ff98043
ML
7275{
7276 struct amdgpu_ring *ring = NULL;
7277 int i;
7278
7279 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7280 ring = &adev->gfx.compute_ring[i];
24de7515 7281 kfree(adev->gfx.mec.mqd_backup[i]);
59a82d7d
XY
7282 amdgpu_bo_free_kernel(&ring->mqd_obj,
7283 &ring->mqd_gpu_addr,
7284 &ring->mqd_ptr);
5ff98043
ML
7285 }
7286
7287 ring = &adev->gfx.kiq.ring;
24de7515 7288 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
59a82d7d
XY
7289 amdgpu_bo_free_kernel(&ring->mqd_obj,
7290 &ring->mqd_gpu_addr,
7291 &ring->mqd_ptr);
7292}