Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
c1b24a14 23#include <linux/kernel.h>
aaa36a97 24#include <linux/firmware.h>
248a1d6f 25#include <drm/drmP.h>
aaa36a97
AD
26#include "amdgpu.h"
27#include "amdgpu_gfx.h"
28#include "vi.h"
aeab2032 29#include "vi_structs.h"
aaa36a97
AD
30#include "vid.h"
31#include "amdgpu_ucode.h"
68182d90 32#include "amdgpu_atombios.h"
eeade25a 33#include "atombios_i2c.h"
aaa36a97
AD
34#include "clearstate_vi.h"
35
36#include "gmc/gmc_8_2_d.h"
37#include "gmc/gmc_8_2_sh_mask.h"
38
39#include "oss/oss_3_0_d.h"
40#include "oss/oss_3_0_sh_mask.h"
41
42#include "bif/bif_5_0_d.h"
43#include "bif/bif_5_0_sh_mask.h"
aaa36a97
AD
44#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h"
47#include "gca/gfx_8_0_enum.h"
48
aaa36a97
AD
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
d9d533c1
KW
52#include "smu/smu_7_1_3_d.h"
53
aaa36a97 54#define GFX8_NUM_GFX_RINGS 1
268cb4c7 55#define GFX8_MEC_HPD_SIZE 2048
aaa36a97
AD
56
57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
6e378858
EH
72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
78
79/* BPM SERDES CMD */
80#define SET_BPM_SERDES_CMD 1
81#define CLE_BPM_SERDES_CMD 0
82
83/* BPM Register Address*/
84enum {
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
90 BPM_REG_FGCG_MAX
91};
92
2b6cd977
EH
93#define RLC_FormatDirectRegListLength 14
94
c65444fe
JZ
95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
e3c7656c
SL
102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
c65444fe
JZ
108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 120
af15a2d5
DZ
121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
2cc0c0b5 128MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
f5830465 129MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
2cc0c0b5 130MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
f5830465 131MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
2cc0c0b5 132MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
f5830465 133MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
2cc0c0b5 134MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
f5830465 135MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
2cc0c0b5 136MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
f5830465 137MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
2cc0c0b5 138MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 139
62aac201
LL
140MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
151
c4642a47 152MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
f5830465 153MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
c4642a47 154MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
f5830465 155MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
c4642a47 156MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
f5830465 157MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
c4642a47 158MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
f5830465 159MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
c4642a47 160MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
f5830465 161MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
c4642a47
JZ
162MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
62aac201
LL
164MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
170
aaa36a97
AD
171static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
172{
173 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
174 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
175 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
176 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
177 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
178 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
179 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
180 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
181 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
182 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
183 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
184 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
185 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
186 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
187 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
188 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
189};
190
191static const u32 golden_settings_tonga_a11[] =
192{
193 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
194 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
195 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
196 mmGB_GPU_ID, 0x0000000f, 0x00000000,
197 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
198 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
199 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
ff9d6460 200 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 201 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
202 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
203 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 204 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
205 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
206 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
207 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 208 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
209};
210
211static const u32 tonga_golden_common_all[] =
212{
213 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
214 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
215 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
216 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
217 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
218 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 219 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
220 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
aaa36a97
AD
221};
222
223static const u32 tonga_mgcg_cgcg_init[] =
224{
225 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
226 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
228 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
229 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
232 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
234 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
236 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
238 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
240 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
243 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
244 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
245 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
246 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
247 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
248 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
249 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
250 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
251 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
252 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
253 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
254 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
257 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
258 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
259 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
260 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
261 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
264 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
269 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
274 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
279 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
284 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
289 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
294 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
297 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
298 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
299 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
300};
301
aefbbd6c
LL
302static const u32 golden_settings_vegam_a11[] =
303{
304 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x01180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321};
322
323static const u32 vegam_golden_common_all[] =
324{
325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
331};
332
2cc0c0b5 333static const u32 golden_settings_polaris11_a11[] =
68182d90 334{
9761bc53
HR
335 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
336 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
337 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
338 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
339 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
340 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
341 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
342 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
343 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
344 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
9761bc53 345 mmSQ_CONFIG, 0x07f80000, 0x01180000,
68182d90
FC
346 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
347 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
348 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
349 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
350 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 351 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
352};
353
2cc0c0b5 354static const u32 polaris11_golden_common_all[] =
68182d90
FC
355{
356 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
357 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
358 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
359 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 360 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
361 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
68182d90
FC
362};
363
2cc0c0b5 364static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
365{
366 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
a5a5e308
HR
367 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
368 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
369 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
370 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
371 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
372 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
373 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
374 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
375 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
376 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
377 mmSQ_CONFIG, 0x07f80000, 0x07180000,
378 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
379 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
380 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
381 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 382 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
383};
384
2cc0c0b5 385static const u32 polaris10_golden_common_all[] =
68182d90
FC
386{
387 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
388 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
389 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
390 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
391 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
392 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 393 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
394 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
68182d90
FC
395};
396
af15a2d5
DZ
397static const u32 fiji_golden_common_all[] =
398{
399 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
400 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
401 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 402 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
403 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
404 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 405 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
406 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
a7ca8ef9
FC
407 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
408 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
409};
410
411static const u32 golden_settings_fiji_a10[] =
412{
413 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
414 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
415 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 416 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
417 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
418 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 419 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
420 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
421 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 422 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 423 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
424};
425
426static const u32 fiji_mgcg_cgcg_init[] =
427{
a7ca8ef9 428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
460 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
461 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
462 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
463};
464
aaa36a97
AD
465static const u32 golden_settings_iceland_a11[] =
466{
467 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
468 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
469 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
470 mmGB_GPU_ID, 0x0000000f, 0x00000000,
471 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
472 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
473 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
474 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
fe85f07f 475 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 476 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
477 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
478 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 479 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
480 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
481 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
482 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
483};
484
485static const u32 iceland_golden_common_all[] =
486{
487 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
488 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
489 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
490 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
491 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
492 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 493 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
494 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
aaa36a97
AD
495};
496
497static const u32 iceland_mgcg_cgcg_init[] =
498{
499 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
500 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
501 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
502 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
503 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
504 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
505 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
506 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
507 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
508 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
510 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
512 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
514 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
516 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
517 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
518 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
519 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
520 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
521 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
522 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
523 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
524 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
525 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
526 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
527 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
528 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
529 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
530 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
533 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
538 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
543 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
553 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
558 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
563};
564
565static const u32 cz_golden_settings_a11[] =
566{
567 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
568 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569 mmGB_GPU_ID, 0x0000000f, 0x00000000,
570 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
571 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
3a494b58 572 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 573 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 574 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
3a494b58 575 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 576 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
577 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
578 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
579};
580
581static const u32 cz_golden_common_all[] =
582{
583 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
584 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
585 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
586 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
587 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
588 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 589 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
590 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
aaa36a97
AD
591};
592
593static const u32 cz_mgcg_cgcg_init[] =
594{
595 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
596 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
597 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
598 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
599 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
600 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
601 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
602 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
603 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
604 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
606 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
608 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
610 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
612 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
613 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
614 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
615 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
616 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
617 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
618 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
619 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
620 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
621 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
622 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
623 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
624 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
625 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
627 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
628 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
629 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
630 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
631 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
634 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
639 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
644 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
649 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
654 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
659 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
664 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
667 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
668 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
669 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
670};
671
e3c7656c
SL
672static const u32 stoney_golden_settings_a11[] =
673{
674 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
675 mmGB_GPU_ID, 0x0000000f, 0x00000000,
676 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
677 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
678 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
679 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 680 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
681 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
682 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
683 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
684};
685
686static const u32 stoney_golden_common_all[] =
687{
688 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
689 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
690 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
691 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
692 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
693 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
98b09f52 694 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
695 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
e3c7656c
SL
696};
697
698static const u32 stoney_mgcg_cgcg_init[] =
699{
700 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
701 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
702 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
703 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
704 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
e3c7656c
SL
705};
706
aaa36a97
AD
707static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
708static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
709static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 710static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 711static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 712static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
95243543
ML
713static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
714static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
aaa36a97
AD
715
716static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
717{
718 switch (adev->asic_type) {
719 case CHIP_TOPAZ:
9c3f2b54
AD
720 amdgpu_device_program_register_sequence(adev,
721 iceland_mgcg_cgcg_init,
722 ARRAY_SIZE(iceland_mgcg_cgcg_init));
723 amdgpu_device_program_register_sequence(adev,
724 golden_settings_iceland_a11,
725 ARRAY_SIZE(golden_settings_iceland_a11));
726 amdgpu_device_program_register_sequence(adev,
727 iceland_golden_common_all,
728 ARRAY_SIZE(iceland_golden_common_all));
aaa36a97 729 break;
af15a2d5 730 case CHIP_FIJI:
9c3f2b54
AD
731 amdgpu_device_program_register_sequence(adev,
732 fiji_mgcg_cgcg_init,
733 ARRAY_SIZE(fiji_mgcg_cgcg_init));
734 amdgpu_device_program_register_sequence(adev,
735 golden_settings_fiji_a10,
736 ARRAY_SIZE(golden_settings_fiji_a10));
737 amdgpu_device_program_register_sequence(adev,
738 fiji_golden_common_all,
739 ARRAY_SIZE(fiji_golden_common_all));
af15a2d5
DZ
740 break;
741
aaa36a97 742 case CHIP_TONGA:
9c3f2b54
AD
743 amdgpu_device_program_register_sequence(adev,
744 tonga_mgcg_cgcg_init,
745 ARRAY_SIZE(tonga_mgcg_cgcg_init));
746 amdgpu_device_program_register_sequence(adev,
747 golden_settings_tonga_a11,
748 ARRAY_SIZE(golden_settings_tonga_a11));
749 amdgpu_device_program_register_sequence(adev,
750 tonga_golden_common_all,
751 ARRAY_SIZE(tonga_golden_common_all));
aaa36a97 752 break;
aefbbd6c
LL
753 case CHIP_VEGAM:
754 amdgpu_device_program_register_sequence(adev,
755 golden_settings_vegam_a11,
756 ARRAY_SIZE(golden_settings_vegam_a11));
757 amdgpu_device_program_register_sequence(adev,
758 vegam_golden_common_all,
759 ARRAY_SIZE(vegam_golden_common_all));
760 break;
2cc0c0b5 761 case CHIP_POLARIS11:
c4642a47 762 case CHIP_POLARIS12:
9c3f2b54
AD
763 amdgpu_device_program_register_sequence(adev,
764 golden_settings_polaris11_a11,
765 ARRAY_SIZE(golden_settings_polaris11_a11));
766 amdgpu_device_program_register_sequence(adev,
767 polaris11_golden_common_all,
768 ARRAY_SIZE(polaris11_golden_common_all));
68182d90 769 break;
2cc0c0b5 770 case CHIP_POLARIS10:
9c3f2b54
AD
771 amdgpu_device_program_register_sequence(adev,
772 golden_settings_polaris10_a11,
773 ARRAY_SIZE(golden_settings_polaris10_a11));
774 amdgpu_device_program_register_sequence(adev,
775 polaris10_golden_common_all,
776 ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 777 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
5765a36d
RZ
778 if (adev->pdev->revision == 0xc7 &&
779 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
780 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
781 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
eeade25a
KW
782 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
783 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
784 }
68182d90 785 break;
aaa36a97 786 case CHIP_CARRIZO:
9c3f2b54
AD
787 amdgpu_device_program_register_sequence(adev,
788 cz_mgcg_cgcg_init,
789 ARRAY_SIZE(cz_mgcg_cgcg_init));
790 amdgpu_device_program_register_sequence(adev,
791 cz_golden_settings_a11,
792 ARRAY_SIZE(cz_golden_settings_a11));
793 amdgpu_device_program_register_sequence(adev,
794 cz_golden_common_all,
795 ARRAY_SIZE(cz_golden_common_all));
aaa36a97 796 break;
e3c7656c 797 case CHIP_STONEY:
9c3f2b54
AD
798 amdgpu_device_program_register_sequence(adev,
799 stoney_mgcg_cgcg_init,
800 ARRAY_SIZE(stoney_mgcg_cgcg_init));
801 amdgpu_device_program_register_sequence(adev,
802 stoney_golden_settings_a11,
803 ARRAY_SIZE(stoney_golden_settings_a11));
804 amdgpu_device_program_register_sequence(adev,
805 stoney_golden_common_all,
806 ARRAY_SIZE(stoney_golden_common_all));
e3c7656c 807 break;
aaa36a97
AD
808 default:
809 break;
810 }
811}
812
813static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
814{
6a41f981 815 adev->gfx.scratch.num_reg = 8;
aaa36a97 816 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
50261151 817 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
aaa36a97
AD
818}
819
820static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
821{
822 struct amdgpu_device *adev = ring->adev;
823 uint32_t scratch;
824 uint32_t tmp = 0;
825 unsigned i;
826 int r;
827
828 r = amdgpu_gfx_scratch_get(adev, &scratch);
829 if (r) {
830 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
831 return r;
832 }
833 WREG32(scratch, 0xCAFEDEAD);
a27de35c 834 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
835 if (r) {
836 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
837 ring->idx, r);
838 amdgpu_gfx_scratch_free(adev, scratch);
839 return r;
840 }
841 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
842 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
843 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 844 amdgpu_ring_commit(ring);
aaa36a97
AD
845
846 for (i = 0; i < adev->usec_timeout; i++) {
847 tmp = RREG32(scratch);
848 if (tmp == 0xDEADBEEF)
849 break;
850 DRM_UDELAY(1);
851 }
852 if (i < adev->usec_timeout) {
9953b72f 853 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
aaa36a97
AD
854 ring->idx, i);
855 } else {
856 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
857 ring->idx, scratch, tmp);
858 r = -EINVAL;
859 }
860 amdgpu_gfx_scratch_free(adev, scratch);
861 return r;
862}
863
bbec97aa 864static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
aaa36a97
AD
865{
866 struct amdgpu_device *adev = ring->adev;
867 struct amdgpu_ib ib;
f54d1867 868 struct dma_fence *f = NULL;
aaa36a97
AD
869 uint32_t scratch;
870 uint32_t tmp = 0;
bbec97aa 871 long r;
aaa36a97
AD
872
873 r = amdgpu_gfx_scratch_get(adev, &scratch);
874 if (r) {
bbec97aa 875 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
aaa36a97
AD
876 return r;
877 }
878 WREG32(scratch, 0xCAFEDEAD);
b203dd95 879 memset(&ib, 0, sizeof(ib));
b07c60c0 880 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97 881 if (r) {
bbec97aa 882 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
42d13693 883 goto err1;
aaa36a97
AD
884 }
885 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
886 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
887 ib.ptr[2] = 0xDEADBEEF;
888 ib.length_dw = 3;
42d13693 889
50ddc75e 890 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
42d13693
CZ
891 if (r)
892 goto err2;
893
f54d1867 894 r = dma_fence_wait_timeout(f, false, timeout);
bbec97aa
CK
895 if (r == 0) {
896 DRM_ERROR("amdgpu: IB test timed out.\n");
897 r = -ETIMEDOUT;
898 goto err2;
899 } else if (r < 0) {
900 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
42d13693 901 goto err2;
aaa36a97 902 }
6d44565d
CK
903 tmp = RREG32(scratch);
904 if (tmp == 0xDEADBEEF) {
9953b72f 905 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
bbec97aa 906 r = 0;
aaa36a97
AD
907 } else {
908 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
909 scratch, tmp);
910 r = -EINVAL;
911 }
42d13693 912err2:
cc55c45d 913 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 914 dma_fence_put(f);
42d13693
CZ
915err1:
916 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
917 return r;
918}
919
13331ac3 920
d6b20c87
AD
921static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
922{
13331ac3
ML
923 release_firmware(adev->gfx.pfp_fw);
924 adev->gfx.pfp_fw = NULL;
925 release_firmware(adev->gfx.me_fw);
926 adev->gfx.me_fw = NULL;
927 release_firmware(adev->gfx.ce_fw);
928 adev->gfx.ce_fw = NULL;
929 release_firmware(adev->gfx.rlc_fw);
930 adev->gfx.rlc_fw = NULL;
931 release_firmware(adev->gfx.mec_fw);
932 adev->gfx.mec_fw = NULL;
933 if ((adev->asic_type != CHIP_STONEY) &&
934 (adev->asic_type != CHIP_TOPAZ))
935 release_firmware(adev->gfx.mec2_fw);
936 adev->gfx.mec2_fw = NULL;
937
938 kfree(adev->gfx.rlc.register_list_format);
939}
940
aaa36a97
AD
941static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
942{
943 const char *chip_name;
944 char fw_name[30];
945 int err;
946 struct amdgpu_firmware_info *info = NULL;
947 const struct common_firmware_header *header = NULL;
595fd013 948 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
949 const struct rlc_firmware_header_v2_0 *rlc_hdr;
950 unsigned int *tmp = NULL, i;
aaa36a97
AD
951
952 DRM_DEBUG("\n");
953
954 switch (adev->asic_type) {
955 case CHIP_TOPAZ:
956 chip_name = "topaz";
957 break;
958 case CHIP_TONGA:
959 chip_name = "tonga";
960 break;
961 case CHIP_CARRIZO:
962 chip_name = "carrizo";
963 break;
af15a2d5
DZ
964 case CHIP_FIJI:
965 chip_name = "fiji";
966 break;
62aac201
LL
967 case CHIP_STONEY:
968 chip_name = "stoney";
68182d90 969 break;
2cc0c0b5
FC
970 case CHIP_POLARIS10:
971 chip_name = "polaris10";
68182d90 972 break;
62aac201
LL
973 case CHIP_POLARIS11:
974 chip_name = "polaris11";
975 break;
c4642a47
JZ
976 case CHIP_POLARIS12:
977 chip_name = "polaris12";
978 break;
62aac201
LL
979 case CHIP_VEGAM:
980 chip_name = "vegam";
e3c7656c 981 break;
aaa36a97
AD
982 default:
983 BUG();
984 }
985
727030b0
EQ
986 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
987 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
988 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
989 if (err == -ENOENT) {
990 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
991 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
992 }
993 } else {
994 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996 }
aaa36a97
AD
997 if (err)
998 goto out;
999 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1000 if (err)
1001 goto out;
595fd013
JZ
1002 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1003 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1004 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1005
727030b0
EQ
1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1008 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1009 if (err == -ENOENT) {
1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1011 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1012 }
1013 } else {
1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016 }
aaa36a97
AD
1017 if (err)
1018 goto out;
1019 err = amdgpu_ucode_validate(adev->gfx.me_fw);
1020 if (err)
1021 goto out;
595fd013
JZ
1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1023 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
ae65a26d 1024
595fd013 1025 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1026
727030b0
EQ
1027 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1029 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1030 if (err == -ENOENT) {
1031 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1032 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1033 }
1034 } else {
1035 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037 }
aaa36a97
AD
1038 if (err)
1039 goto out;
1040 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1041 if (err)
1042 goto out;
595fd013
JZ
1043 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1044 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1045 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1046
63a7c748
TH
1047 /*
1048 * Support for MCBP/Virtualization in combination with chained IBs is
1049 * formal released on feature version #46
1050 */
1051 if (adev->gfx.ce_feature_version >= 46 &&
1052 adev->gfx.pfp_feature_version >= 46) {
1053 adev->virt.chained_ib_support = true;
1054 DRM_INFO("Chained IB support enabled!\n");
1055 } else
1056 adev->virt.chained_ib_support = false;
1057
c65444fe 1058 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
1059 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1060 if (err)
1061 goto out;
1062 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
1063 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1064 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1065 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1066
1067 adev->gfx.rlc.save_and_restore_offset =
1068 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1069 adev->gfx.rlc.clear_state_descriptor_offset =
1070 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1071 adev->gfx.rlc.avail_scratch_ram_locations =
1072 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1073 adev->gfx.rlc.reg_restore_list_size =
1074 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1075 adev->gfx.rlc.reg_list_format_start =
1076 le32_to_cpu(rlc_hdr->reg_list_format_start);
1077 adev->gfx.rlc.reg_list_format_separate_start =
1078 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1079 adev->gfx.rlc.starting_offsets_start =
1080 le32_to_cpu(rlc_hdr->starting_offsets_start);
1081 adev->gfx.rlc.reg_list_format_size_bytes =
1082 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1083 adev->gfx.rlc.reg_list_size_bytes =
1084 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1085
1086 adev->gfx.rlc.register_list_format =
1087 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1088 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1089
1090 if (!adev->gfx.rlc.register_list_format) {
1091 err = -ENOMEM;
1092 goto out;
1093 }
1094
ae17c999 1095 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1096 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1097 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1098 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1099
1100 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1101
ae17c999 1102 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1103 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1104 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1105 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 1106
727030b0
EQ
1107 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1108 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1109 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1110 if (err == -ENOENT) {
1111 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1112 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1113 }
1114 } else {
1115 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117 }
aaa36a97
AD
1118 if (err)
1119 goto out;
1120 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1121 if (err)
1122 goto out;
595fd013
JZ
1123 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1124 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1125 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1126
97dde76a
AD
1127 if ((adev->asic_type != CHIP_STONEY) &&
1128 (adev->asic_type != CHIP_TOPAZ)) {
727030b0
EQ
1129 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1130 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1131 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1132 if (err == -ENOENT) {
1133 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1134 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1135 }
1136 } else {
1137 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139 }
e3c7656c
SL
1140 if (!err) {
1141 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1142 if (err)
1143 goto out;
1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1145 adev->gfx.mec2_fw->data;
1146 adev->gfx.mec2_fw_version =
1147 le32_to_cpu(cp_hdr->header.ucode_version);
1148 adev->gfx.mec2_feature_version =
1149 le32_to_cpu(cp_hdr->ucode_feature_version);
1150 } else {
1151 err = 0;
1152 adev->gfx.mec2_fw = NULL;
1153 }
aaa36a97
AD
1154 }
1155
e635ee07 1156 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
aaa36a97
AD
1157 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1158 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1159 info->fw = adev->gfx.pfp_fw;
1160 header = (const struct common_firmware_header *)info->fw->data;
1161 adev->firmware.fw_size +=
1162 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163
1164 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1165 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1166 info->fw = adev->gfx.me_fw;
1167 header = (const struct common_firmware_header *)info->fw->data;
1168 adev->firmware.fw_size +=
1169 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170
1171 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1172 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1173 info->fw = adev->gfx.ce_fw;
1174 header = (const struct common_firmware_header *)info->fw->data;
1175 adev->firmware.fw_size +=
1176 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177
1178 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1179 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1180 info->fw = adev->gfx.rlc_fw;
1181 header = (const struct common_firmware_header *)info->fw->data;
1182 adev->firmware.fw_size +=
1183 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184
1185 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1186 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1187 info->fw = adev->gfx.mec_fw;
1188 header = (const struct common_firmware_header *)info->fw->data;
1189 adev->firmware.fw_size +=
1190 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191
4c2b2453
ML
1192 /* we need account JT in */
1193 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1194 adev->firmware.fw_size +=
1195 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1196
bed5712e
ML
1197 if (amdgpu_sriov_vf(adev)) {
1198 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1199 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1200 info->fw = adev->gfx.mec_fw;
1201 adev->firmware.fw_size +=
1202 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1203 }
1204
aaa36a97
AD
1205 if (adev->gfx.mec2_fw) {
1206 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1207 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1208 info->fw = adev->gfx.mec2_fw;
1209 header = (const struct common_firmware_header *)info->fw->data;
1210 adev->firmware.fw_size +=
1211 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1212 }
1213
1214 }
1215
1216out:
1217 if (err) {
1218 dev_err(adev->dev,
1219 "gfx8: Failed to load firmware \"%s\"\n",
1220 fw_name);
1221 release_firmware(adev->gfx.pfp_fw);
1222 adev->gfx.pfp_fw = NULL;
1223 release_firmware(adev->gfx.me_fw);
1224 adev->gfx.me_fw = NULL;
1225 release_firmware(adev->gfx.ce_fw);
1226 adev->gfx.ce_fw = NULL;
1227 release_firmware(adev->gfx.rlc_fw);
1228 adev->gfx.rlc_fw = NULL;
1229 release_firmware(adev->gfx.mec_fw);
1230 adev->gfx.mec_fw = NULL;
1231 release_firmware(adev->gfx.mec2_fw);
1232 adev->gfx.mec2_fw = NULL;
1233 }
1234 return err;
1235}
1236
2b6cd977
EH
1237static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1238 volatile u32 *buffer)
1239{
1240 u32 count = 0, i;
1241 const struct cs_section_def *sect = NULL;
1242 const struct cs_extent_def *ext = NULL;
1243
1244 if (adev->gfx.rlc.cs_data == NULL)
1245 return;
1246 if (buffer == NULL)
1247 return;
1248
1249 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1250 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1251
1252 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1253 buffer[count++] = cpu_to_le32(0x80000000);
1254 buffer[count++] = cpu_to_le32(0x80000000);
1255
1256 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1257 for (ext = sect->section; ext->extent != NULL; ++ext) {
1258 if (sect->id == SECT_CONTEXT) {
1259 buffer[count++] =
1260 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1261 buffer[count++] = cpu_to_le32(ext->reg_index -
1262 PACKET3_SET_CONTEXT_REG_START);
1263 for (i = 0; i < ext->reg_count; i++)
1264 buffer[count++] = cpu_to_le32(ext->extent[i]);
1265 } else {
1266 return;
1267 }
1268 }
1269 }
1270
1271 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1272 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1273 PACKET3_SET_CONTEXT_REG_START);
34817db6
AD
1274 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1275 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
2b6cd977
EH
1276
1277 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1278 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1279
1280 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1281 buffer[count++] = cpu_to_le32(0);
1282}
1283
fb16007b
AD
1284static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1285{
1286 const __le32 *fw_data;
1287 volatile u32 *dst_ptr;
1288 int me, i, max_me = 4;
1289 u32 bo_offset = 0;
1290 u32 table_offset, table_size;
1291
1292 if (adev->asic_type == CHIP_CARRIZO)
1293 max_me = 5;
1294
1295 /* write the cp table buffer */
1296 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1297 for (me = 0; me < max_me; me++) {
1298 if (me == 0) {
1299 const struct gfx_firmware_header_v1_0 *hdr =
1300 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1301 fw_data = (const __le32 *)
1302 (adev->gfx.ce_fw->data +
1303 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1304 table_offset = le32_to_cpu(hdr->jt_offset);
1305 table_size = le32_to_cpu(hdr->jt_size);
1306 } else if (me == 1) {
1307 const struct gfx_firmware_header_v1_0 *hdr =
1308 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1309 fw_data = (const __le32 *)
1310 (adev->gfx.pfp_fw->data +
1311 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1312 table_offset = le32_to_cpu(hdr->jt_offset);
1313 table_size = le32_to_cpu(hdr->jt_size);
1314 } else if (me == 2) {
1315 const struct gfx_firmware_header_v1_0 *hdr =
1316 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1317 fw_data = (const __le32 *)
1318 (adev->gfx.me_fw->data +
1319 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1320 table_offset = le32_to_cpu(hdr->jt_offset);
1321 table_size = le32_to_cpu(hdr->jt_size);
1322 } else if (me == 3) {
1323 const struct gfx_firmware_header_v1_0 *hdr =
1324 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1325 fw_data = (const __le32 *)
1326 (adev->gfx.mec_fw->data +
1327 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1328 table_offset = le32_to_cpu(hdr->jt_offset);
1329 table_size = le32_to_cpu(hdr->jt_size);
1330 } else if (me == 4) {
1331 const struct gfx_firmware_header_v1_0 *hdr =
1332 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1333 fw_data = (const __le32 *)
1334 (adev->gfx.mec2_fw->data +
1335 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1336 table_offset = le32_to_cpu(hdr->jt_offset);
1337 table_size = le32_to_cpu(hdr->jt_size);
1338 }
1339
1340 for (i = 0; i < table_size; i ++) {
1341 dst_ptr[bo_offset + i] =
1342 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1343 }
1344
1345 bo_offset += table_size;
1346 }
1347}
1348
2b6cd977
EH
1349static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1350{
078af1a3
CK
1351 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1352 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
2b6cd977
EH
1353}
1354
1355static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1356{
1357 volatile u32 *dst_ptr;
1358 u32 dws;
1359 const struct cs_section_def *cs_data;
1360 int r;
1361
1362 adev->gfx.rlc.cs_data = vi_cs_data;
1363
1364 cs_data = adev->gfx.rlc.cs_data;
1365
1366 if (cs_data) {
1367 /* clear state block */
1368 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1369
a4a02777
CK
1370 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1371 AMDGPU_GEM_DOMAIN_VRAM,
1372 &adev->gfx.rlc.clear_state_obj,
1373 &adev->gfx.rlc.clear_state_gpu_addr,
1374 (void **)&adev->gfx.rlc.cs_ptr);
2b6cd977 1375 if (r) {
a4a02777 1376 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
2b6cd977
EH
1377 gfx_v8_0_rlc_fini(adev);
1378 return r;
1379 }
1380
2b6cd977
EH
1381 /* set up the cs buffer */
1382 dst_ptr = adev->gfx.rlc.cs_ptr;
1383 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1384 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1385 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1386 }
1387
fb16007b
AD
1388 if ((adev->asic_type == CHIP_CARRIZO) ||
1389 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1390 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
a4a02777
CK
1391 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1392 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1393 &adev->gfx.rlc.cp_table_obj,
1394 &adev->gfx.rlc.cp_table_gpu_addr,
1395 (void **)&adev->gfx.rlc.cp_table_ptr);
fb16007b 1396 if (r) {
a4a02777 1397 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
fb16007b
AD
1398 return r;
1399 }
1400
1401 cz_init_cp_jump_table(adev);
1402
1403 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1404 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1405 }
1406
2b6cd977
EH
1407 return 0;
1408}
1409
aaa36a97
AD
1410static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1411{
078af1a3 1412 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
aaa36a97
AD
1413}
1414
aaa36a97
AD
1415static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1416{
1417 int r;
1418 u32 *hpd;
42794b27 1419 size_t mec_hpd_size;
aaa36a97 1420
78c16834
AR
1421 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1422
78c16834 1423 /* take ownership of the relevant compute queues */
41f6a99a 1424 amdgpu_gfx_compute_queue_acquire(adev);
78c16834
AR
1425
1426 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
aaa36a97 1427
a4a02777
CK
1428 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1429 AMDGPU_GEM_DOMAIN_GTT,
1430 &adev->gfx.mec.hpd_eop_obj,
1431 &adev->gfx.mec.hpd_eop_gpu_addr,
1432 (void **)&hpd);
aaa36a97 1433 if (r) {
a4a02777 1434 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
aaa36a97
AD
1435 return r;
1436 }
1437
42794b27 1438 memset(hpd, 0, mec_hpd_size);
aaa36a97
AD
1439
1440 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1441 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1442
1443 return 0;
1444}
1445
ccba7691
AD
1446static const u32 vgpr_init_compute_shader[] =
1447{
1448 0x7e000209, 0x7e020208,
1449 0x7e040207, 0x7e060206,
1450 0x7e080205, 0x7e0a0204,
1451 0x7e0c0203, 0x7e0e0202,
1452 0x7e100201, 0x7e120200,
1453 0x7e140209, 0x7e160208,
1454 0x7e180207, 0x7e1a0206,
1455 0x7e1c0205, 0x7e1e0204,
1456 0x7e200203, 0x7e220202,
1457 0x7e240201, 0x7e260200,
1458 0x7e280209, 0x7e2a0208,
1459 0x7e2c0207, 0x7e2e0206,
1460 0x7e300205, 0x7e320204,
1461 0x7e340203, 0x7e360202,
1462 0x7e380201, 0x7e3a0200,
1463 0x7e3c0209, 0x7e3e0208,
1464 0x7e400207, 0x7e420206,
1465 0x7e440205, 0x7e460204,
1466 0x7e480203, 0x7e4a0202,
1467 0x7e4c0201, 0x7e4e0200,
1468 0x7e500209, 0x7e520208,
1469 0x7e540207, 0x7e560206,
1470 0x7e580205, 0x7e5a0204,
1471 0x7e5c0203, 0x7e5e0202,
1472 0x7e600201, 0x7e620200,
1473 0x7e640209, 0x7e660208,
1474 0x7e680207, 0x7e6a0206,
1475 0x7e6c0205, 0x7e6e0204,
1476 0x7e700203, 0x7e720202,
1477 0x7e740201, 0x7e760200,
1478 0x7e780209, 0x7e7a0208,
1479 0x7e7c0207, 0x7e7e0206,
1480 0xbf8a0000, 0xbf810000,
1481};
1482
1483static const u32 sgpr_init_compute_shader[] =
1484{
1485 0xbe8a0100, 0xbe8c0102,
1486 0xbe8e0104, 0xbe900106,
1487 0xbe920108, 0xbe940100,
1488 0xbe960102, 0xbe980104,
1489 0xbe9a0106, 0xbe9c0108,
1490 0xbe9e0100, 0xbea00102,
1491 0xbea20104, 0xbea40106,
1492 0xbea60108, 0xbea80100,
1493 0xbeaa0102, 0xbeac0104,
1494 0xbeae0106, 0xbeb00108,
1495 0xbeb20100, 0xbeb40102,
1496 0xbeb60104, 0xbeb80106,
1497 0xbeba0108, 0xbebc0100,
1498 0xbebe0102, 0xbec00104,
1499 0xbec20106, 0xbec40108,
1500 0xbec60100, 0xbec80102,
1501 0xbee60004, 0xbee70005,
1502 0xbeea0006, 0xbeeb0007,
1503 0xbee80008, 0xbee90009,
1504 0xbefc0000, 0xbf8a0000,
1505 0xbf810000, 0x00000000,
1506};
1507
1508static const u32 vgpr_init_regs[] =
1509{
1510 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
38610f15 1511 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
ccba7691
AD
1512 mmCOMPUTE_NUM_THREAD_X, 256*4,
1513 mmCOMPUTE_NUM_THREAD_Y, 1,
1514 mmCOMPUTE_NUM_THREAD_Z, 1,
38610f15 1515 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
ccba7691
AD
1516 mmCOMPUTE_PGM_RSRC2, 20,
1517 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1518 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1519 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1520 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1521 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1522 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1523 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1524 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1525 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1526 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1527};
1528
1529static const u32 sgpr1_init_regs[] =
1530{
1531 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
38610f15 1532 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
ccba7691
AD
1533 mmCOMPUTE_NUM_THREAD_X, 256*5,
1534 mmCOMPUTE_NUM_THREAD_Y, 1,
1535 mmCOMPUTE_NUM_THREAD_Z, 1,
38610f15 1536 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
ccba7691
AD
1537 mmCOMPUTE_PGM_RSRC2, 20,
1538 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1539 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1540 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1541 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1542 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1543 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1544 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1545 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1546 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1547 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1548};
1549
1550static const u32 sgpr2_init_regs[] =
1551{
1552 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1553 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1554 mmCOMPUTE_NUM_THREAD_X, 256*5,
1555 mmCOMPUTE_NUM_THREAD_Y, 1,
1556 mmCOMPUTE_NUM_THREAD_Z, 1,
38610f15 1557 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
ccba7691
AD
1558 mmCOMPUTE_PGM_RSRC2, 20,
1559 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1560 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1561 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1562 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1563 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1564 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1565 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1566 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1567 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1568 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1569};
1570
1571static const u32 sec_ded_counter_registers[] =
1572{
1573 mmCPC_EDC_ATC_CNT,
1574 mmCPC_EDC_SCRATCH_CNT,
1575 mmCPC_EDC_UCODE_CNT,
1576 mmCPF_EDC_ATC_CNT,
1577 mmCPF_EDC_ROQ_CNT,
1578 mmCPF_EDC_TAG_CNT,
1579 mmCPG_EDC_ATC_CNT,
1580 mmCPG_EDC_DMA_CNT,
1581 mmCPG_EDC_TAG_CNT,
1582 mmDC_EDC_CSINVOC_CNT,
1583 mmDC_EDC_RESTORE_CNT,
1584 mmDC_EDC_STATE_CNT,
1585 mmGDS_EDC_CNT,
1586 mmGDS_EDC_GRBM_CNT,
1587 mmGDS_EDC_OA_DED,
1588 mmSPI_EDC_CNT,
1589 mmSQC_ATC_EDC_GATCL1_CNT,
1590 mmSQC_EDC_CNT,
1591 mmSQ_EDC_DED_CNT,
1592 mmSQ_EDC_INFO,
1593 mmSQ_EDC_SEC_CNT,
1594 mmTCC_EDC_CNT,
1595 mmTCP_ATC_EDC_GATCL1_CNT,
1596 mmTCP_EDC_CNT,
1597 mmTD_EDC_CNT
1598};
1599
1600static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1601{
1602 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1603 struct amdgpu_ib ib;
f54d1867 1604 struct dma_fence *f = NULL;
ccba7691
AD
1605 int r, i;
1606 u32 tmp;
1607 unsigned total_size, vgpr_offset, sgpr_offset;
1608 u64 gpu_addr;
1609
1610 /* only supported on CZ */
1611 if (adev->asic_type != CHIP_CARRIZO)
1612 return 0;
1613
1614 /* bail if the compute ring is not ready */
1615 if (!ring->ready)
1616 return 0;
1617
1618 tmp = RREG32(mmGB_EDC_MODE);
1619 WREG32(mmGB_EDC_MODE, 0);
1620
1621 total_size =
1622 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1623 total_size +=
1624 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1625 total_size +=
1626 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1627 total_size = ALIGN(total_size, 256);
1628 vgpr_offset = total_size;
1629 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1630 sgpr_offset = total_size;
1631 total_size += sizeof(sgpr_init_compute_shader);
1632
1633 /* allocate an indirect buffer to put the commands in */
1634 memset(&ib, 0, sizeof(ib));
b07c60c0 1635 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1636 if (r) {
1637 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1638 return r;
1639 }
1640
1641 /* load the compute shaders */
1642 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1643 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1644
1645 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1646 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1647
1648 /* init the ib length to 0 */
1649 ib.length_dw = 0;
1650
1651 /* VGPR */
1652 /* write the register state for the compute dispatch */
1653 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1656 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1657 }
1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665 /* write dispatch packet */
1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667 ib.ptr[ib.length_dw++] = 8; /* x */
1668 ib.ptr[ib.length_dw++] = 1; /* y */
1669 ib.ptr[ib.length_dw++] = 1; /* z */
1670 ib.ptr[ib.length_dw++] =
1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673 /* write CS partial flush packet */
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677 /* SGPR1 */
1678 /* write the register state for the compute dispatch */
1679 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1682 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1683 }
1684 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690
1691 /* write dispatch packet */
1692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693 ib.ptr[ib.length_dw++] = 8; /* x */
1694 ib.ptr[ib.length_dw++] = 1; /* y */
1695 ib.ptr[ib.length_dw++] = 1; /* z */
1696 ib.ptr[ib.length_dw++] =
1697 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698
1699 /* write CS partial flush packet */
1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702
1703 /* SGPR2 */
1704 /* write the register state for the compute dispatch */
1705 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1707 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1708 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1709 }
1710 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1711 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1713 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1714 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1715 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1716
1717 /* write dispatch packet */
1718 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1719 ib.ptr[ib.length_dw++] = 8; /* x */
1720 ib.ptr[ib.length_dw++] = 1; /* y */
1721 ib.ptr[ib.length_dw++] = 1; /* z */
1722 ib.ptr[ib.length_dw++] =
1723 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1724
1725 /* write CS partial flush packet */
1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1727 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1728
1729 /* shedule the ib on the ring */
50ddc75e 1730 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
ccba7691
AD
1731 if (r) {
1732 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1733 goto fail;
1734 }
1735
1736 /* wait for the GPU to finish processing the IB */
f54d1867 1737 r = dma_fence_wait(f, false);
ccba7691
AD
1738 if (r) {
1739 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1740 goto fail;
1741 }
1742
1743 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1744 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1745 WREG32(mmGB_EDC_MODE, tmp);
1746
1747 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1748 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1749 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1750
1751
1752 /* read back registers to clear the counters */
1753 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1754 RREG32(sec_ded_counter_registers[i]);
1755
1756fail:
cc55c45d 1757 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 1758 dma_fence_put(f);
ccba7691
AD
1759
1760 return r;
1761}
1762
68182d90 1763static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1764{
1765 u32 gb_addr_config;
1766 u32 mc_shared_chmap, mc_arb_ramcfg;
1767 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1768 u32 tmp;
68182d90 1769 int ret;
0bde3a95
AD
1770
1771 switch (adev->asic_type) {
1772 case CHIP_TOPAZ:
1773 adev->gfx.config.max_shader_engines = 1;
1774 adev->gfx.config.max_tile_pipes = 2;
1775 adev->gfx.config.max_cu_per_sh = 6;
1776 adev->gfx.config.max_sh_per_se = 1;
1777 adev->gfx.config.max_backends_per_se = 2;
1778 adev->gfx.config.max_texture_channel_caches = 2;
1779 adev->gfx.config.max_gprs = 256;
1780 adev->gfx.config.max_gs_threads = 32;
1781 adev->gfx.config.max_hw_contexts = 8;
1782
1783 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1784 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1785 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1786 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1787 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1788 break;
1789 case CHIP_FIJI:
1790 adev->gfx.config.max_shader_engines = 4;
1791 adev->gfx.config.max_tile_pipes = 16;
1792 adev->gfx.config.max_cu_per_sh = 16;
1793 adev->gfx.config.max_sh_per_se = 1;
1794 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1795 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1796 adev->gfx.config.max_gprs = 256;
1797 adev->gfx.config.max_gs_threads = 32;
1798 adev->gfx.config.max_hw_contexts = 8;
1799
68182d90
FC
1800 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1801 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1802 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1803 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1804 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1805 break;
2cc0c0b5 1806 case CHIP_POLARIS11:
c4642a47 1807 case CHIP_POLARIS12:
68182d90
FC
1808 ret = amdgpu_atombios_get_gfx_info(adev);
1809 if (ret)
1810 return ret;
1811 adev->gfx.config.max_gprs = 256;
1812 adev->gfx.config.max_gs_threads = 32;
1813 adev->gfx.config.max_hw_contexts = 8;
1814
1815 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1816 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1817 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1818 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1819 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1820 break;
2cc0c0b5 1821 case CHIP_POLARIS10:
71765469 1822 case CHIP_VEGAM:
68182d90
FC
1823 ret = amdgpu_atombios_get_gfx_info(adev);
1824 if (ret)
1825 return ret;
1826 adev->gfx.config.max_gprs = 256;
1827 adev->gfx.config.max_gs_threads = 32;
1828 adev->gfx.config.max_hw_contexts = 8;
1829
0bde3a95
AD
1830 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1831 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1832 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1833 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1834 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1835 break;
1836 case CHIP_TONGA:
1837 adev->gfx.config.max_shader_engines = 4;
1838 adev->gfx.config.max_tile_pipes = 8;
1839 adev->gfx.config.max_cu_per_sh = 8;
1840 adev->gfx.config.max_sh_per_se = 1;
1841 adev->gfx.config.max_backends_per_se = 2;
1842 adev->gfx.config.max_texture_channel_caches = 8;
1843 adev->gfx.config.max_gprs = 256;
1844 adev->gfx.config.max_gs_threads = 32;
1845 adev->gfx.config.max_hw_contexts = 8;
1846
1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852 break;
1853 case CHIP_CARRIZO:
1854 adev->gfx.config.max_shader_engines = 1;
1855 adev->gfx.config.max_tile_pipes = 2;
1856 adev->gfx.config.max_sh_per_se = 1;
1857 adev->gfx.config.max_backends_per_se = 2;
943c05bd 1858 adev->gfx.config.max_cu_per_sh = 8;
0bde3a95
AD
1859 adev->gfx.config.max_texture_channel_caches = 2;
1860 adev->gfx.config.max_gprs = 256;
1861 adev->gfx.config.max_gs_threads = 32;
1862 adev->gfx.config.max_hw_contexts = 8;
1863
e3c7656c
SL
1864 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869 break;
1870 case CHIP_STONEY:
1871 adev->gfx.config.max_shader_engines = 1;
1872 adev->gfx.config.max_tile_pipes = 2;
1873 adev->gfx.config.max_sh_per_se = 1;
1874 adev->gfx.config.max_backends_per_se = 1;
943c05bd 1875 adev->gfx.config.max_cu_per_sh = 3;
e3c7656c
SL
1876 adev->gfx.config.max_texture_channel_caches = 2;
1877 adev->gfx.config.max_gprs = 256;
1878 adev->gfx.config.max_gs_threads = 16;
1879 adev->gfx.config.max_hw_contexts = 8;
1880
0bde3a95
AD
1881 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1882 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1883 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1884 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1885 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1886 break;
1887 default:
1888 adev->gfx.config.max_shader_engines = 2;
1889 adev->gfx.config.max_tile_pipes = 4;
1890 adev->gfx.config.max_cu_per_sh = 2;
1891 adev->gfx.config.max_sh_per_se = 1;
1892 adev->gfx.config.max_backends_per_se = 2;
1893 adev->gfx.config.max_texture_channel_caches = 4;
1894 adev->gfx.config.max_gprs = 256;
1895 adev->gfx.config.max_gs_threads = 32;
1896 adev->gfx.config.max_hw_contexts = 8;
1897
1898 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1903 break;
1904 }
1905
1906 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1907 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1908 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1909
1910 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1911 adev->gfx.config.mem_max_burst_length_bytes = 256;
1912 if (adev->flags & AMD_IS_APU) {
1913 /* Get memory bank mapping mode. */
1914 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1915 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1916 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1917
1918 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1919 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1920 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1921
1922 /* Validate settings in case only one DIMM installed. */
1923 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1924 dimm00_addr_map = 0;
1925 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1926 dimm01_addr_map = 0;
1927 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1928 dimm10_addr_map = 0;
1929 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1930 dimm11_addr_map = 0;
1931
1932 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1933 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1934 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1935 adev->gfx.config.mem_row_size_in_kb = 2;
1936 else
1937 adev->gfx.config.mem_row_size_in_kb = 1;
1938 } else {
1939 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1940 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1941 if (adev->gfx.config.mem_row_size_in_kb > 4)
1942 adev->gfx.config.mem_row_size_in_kb = 4;
1943 }
1944
1945 adev->gfx.config.shader_engine_tile_size = 32;
1946 adev->gfx.config.num_gpus = 1;
1947 adev->gfx.config.multi_gpu_tile_size = 64;
1948
1949 /* fix up row size */
1950 switch (adev->gfx.config.mem_row_size_in_kb) {
1951 case 1:
1952 default:
1953 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1954 break;
1955 case 2:
1956 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1957 break;
1958 case 4:
1959 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1960 break;
1961 }
1962 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
1963
1964 return 0;
0bde3a95
AD
1965}
1966
e33fec48
AR
1967static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1968 int mec, int pipe, int queue)
1969{
1970 int r;
1971 unsigned irq_type;
1972 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1973
1974 ring = &adev->gfx.compute_ring[ring_id];
1975
1976 /* mec0 is me1 */
1977 ring->me = mec + 1;
1978 ring->pipe = pipe;
1979 ring->queue = queue;
1980
1981 ring->ring_obj = NULL;
1982 ring->use_doorbell = true;
1983 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1984 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1985 + (ring_id * GFX8_MEC_HPD_SIZE);
1986 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1987
1988 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1989 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1990 + ring->pipe;
1991
1992 /* type-2 packets are deprecated on MEC, use type-3 instead */
1993 r = amdgpu_ring_init(adev, ring, 1024,
1994 &adev->gfx.eop_irq, irq_type);
1995 if (r)
1996 return r;
1997
1998
1999 return 0;
2000}
2001
5fc3aeeb 2002static int gfx_v8_0_sw_init(void *handle)
aaa36a97 2003{
e33fec48 2004 int i, j, k, r, ring_id;
aaa36a97 2005 struct amdgpu_ring *ring;
4e638ae9 2006 struct amdgpu_kiq *kiq;
5fc3aeeb 2007 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2008
4853bbb6 2009 switch (adev->asic_type) {
4853bbb6 2010 case CHIP_TONGA:
71765469
LL
2011 case CHIP_CARRIZO:
2012 case CHIP_FIJI:
2013 case CHIP_POLARIS10:
4853bbb6
AD
2014 case CHIP_POLARIS11:
2015 case CHIP_POLARIS12:
71765469 2016 case CHIP_VEGAM:
4853bbb6
AD
2017 adev->gfx.mec.num_mec = 2;
2018 break;
2019 case CHIP_TOPAZ:
2020 case CHIP_STONEY:
2021 default:
2022 adev->gfx.mec.num_mec = 1;
2023 break;
2024 }
2025
2026 adev->gfx.mec.num_pipe_per_mec = 4;
2027 adev->gfx.mec.num_queue_per_pipe = 8;
2028
4e638ae9 2029 /* KIQ event */
d766e6a3 2030 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
4e638ae9
XY
2031 if (r)
2032 return r;
2033
aaa36a97 2034 /* EOP Event */
d766e6a3 2035 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
aaa36a97
AD
2036 if (r)
2037 return r;
2038
2039 /* Privileged reg */
d766e6a3
AD
2040 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2041 &adev->gfx.priv_reg_irq);
aaa36a97
AD
2042 if (r)
2043 return r;
2044
2045 /* Privileged inst */
d766e6a3
AD
2046 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2047 &adev->gfx.priv_inst_irq);
aaa36a97
AD
2048 if (r)
2049 return r;
2050
2051 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2052
2053 gfx_v8_0_scratch_init(adev);
2054
2055 r = gfx_v8_0_init_microcode(adev);
2056 if (r) {
2057 DRM_ERROR("Failed to load gfx firmware!\n");
2058 return r;
2059 }
2060
2b6cd977
EH
2061 r = gfx_v8_0_rlc_init(adev);
2062 if (r) {
2063 DRM_ERROR("Failed to init rlc BOs!\n");
2064 return r;
2065 }
2066
aaa36a97
AD
2067 r = gfx_v8_0_mec_init(adev);
2068 if (r) {
2069 DRM_ERROR("Failed to init MEC BOs!\n");
2070 return r;
2071 }
2072
aaa36a97
AD
2073 /* set up the gfx ring */
2074 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2075 ring = &adev->gfx.gfx_ring[i];
2076 ring->ring_obj = NULL;
2077 sprintf(ring->name, "gfx");
2078 /* no gfx doorbells on iceland */
2079 if (adev->asic_type != CHIP_TOPAZ) {
2080 ring->use_doorbell = true;
2081 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2082 }
2083
79887142
CK
2084 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2085 AMDGPU_CP_IRQ_GFX_EOP);
aaa36a97
AD
2086 if (r)
2087 return r;
2088 }
2089
aaa36a97 2090
e33fec48
AR
2091 /* set up the compute queues - allocate horizontally across pipes */
2092 ring_id = 0;
2093 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2094 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2095 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2db0cdbe 2096 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
e33fec48 2097 continue;
78c16834 2098
e33fec48
AR
2099 r = gfx_v8_0_compute_ring_init(adev,
2100 ring_id,
2101 i, k, j);
2102 if (r)
2103 return r;
78c16834 2104
e33fec48
AR
2105 ring_id++;
2106 }
aaa36a97 2107 }
aaa36a97
AD
2108 }
2109
71c37505 2110 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
b4e40676
DP
2111 if (r) {
2112 DRM_ERROR("Failed to init KIQ BOs!\n");
2113 return r;
2114 }
596c67d0 2115
b4e40676 2116 kiq = &adev->gfx.kiq;
71c37505 2117 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
b4e40676
DP
2118 if (r)
2119 return r;
596c67d0 2120
b4e40676 2121 /* create MQD for all compute queues as well as KIQ for SRIOV case */
6b0fa871 2122 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
b4e40676
DP
2123 if (r)
2124 return r;
596c67d0 2125
aaa36a97 2126 /* reserve GDS, GWS and OA resource for gfx */
78bbbd9c
CK
2127 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2128 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2129 &adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2130 if (r)
2131 return r;
2132
78bbbd9c
CK
2133 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2134 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2135 &adev->gds.gws_gfx_bo, NULL, NULL);
aaa36a97
AD
2136 if (r)
2137 return r;
2138
78bbbd9c
CK
2139 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2140 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2141 &adev->gds.oa_gfx_bo, NULL, NULL);
aaa36a97
AD
2142 if (r)
2143 return r;
2144
a101a899
KW
2145 adev->gfx.ce_ram_size = 0x8000;
2146
68182d90
FC
2147 r = gfx_v8_0_gpu_early_init(adev);
2148 if (r)
2149 return r;
0bde3a95 2150
aaa36a97
AD
2151 return 0;
2152}
2153
5fc3aeeb 2154static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2155{
2156 int i;
5fc3aeeb 2157 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2158
8640faed
JZ
2159 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2160 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2161 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2162
2163 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2164 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2165 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2166 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2167
b9683c21 2168 amdgpu_gfx_compute_mqd_sw_fini(adev);
71c37505
AD
2169 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2170 amdgpu_gfx_kiq_fini(adev);
596c67d0 2171
aaa36a97 2172 gfx_v8_0_mec_fini(adev);
2b6cd977 2173 gfx_v8_0_rlc_fini(adev);
9862def9
ML
2174 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2175 &adev->gfx.rlc.clear_state_gpu_addr,
2176 (void **)&adev->gfx.rlc.cs_ptr);
2177 if ((adev->asic_type == CHIP_CARRIZO) ||
2178 (adev->asic_type == CHIP_STONEY)) {
2179 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2180 &adev->gfx.rlc.cp_table_gpu_addr,
2181 (void **)&adev->gfx.rlc.cp_table_ptr);
2182 }
13331ac3 2183 gfx_v8_0_free_microcode(adev);
2b6cd977 2184
aaa36a97
AD
2185 return 0;
2186}
2187
2188static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2189{
90bea0ab 2190 uint32_t *modearray, *mod2array;
eb64526f
TSD
2191 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2192 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2193 u32 reg_offset;
aaa36a97 2194
90bea0ab
TSD
2195 modearray = adev->gfx.config.tile_mode_array;
2196 mod2array = adev->gfx.config.macrotile_mode_array;
2197
2198 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2199 modearray[reg_offset] = 0;
2200
2201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2202 mod2array[reg_offset] = 0;
aaa36a97
AD
2203
2204 switch (adev->asic_type) {
2205 case CHIP_TOPAZ:
90bea0ab
TSD
2206 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2207 PIPE_CONFIG(ADDR_SURF_P2) |
2208 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2210 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211 PIPE_CONFIG(ADDR_SURF_P2) |
2212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2214 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2215 PIPE_CONFIG(ADDR_SURF_P2) |
2216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2218 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 PIPE_CONFIG(ADDR_SURF_P2) |
2220 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2222 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2223 PIPE_CONFIG(ADDR_SURF_P2) |
2224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2226 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227 PIPE_CONFIG(ADDR_SURF_P2) |
2228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2231 PIPE_CONFIG(ADDR_SURF_P2) |
2232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2234 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2235 PIPE_CONFIG(ADDR_SURF_P2));
2236 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2237 PIPE_CONFIG(ADDR_SURF_P2) |
2238 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2240 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 PIPE_CONFIG(ADDR_SURF_P2) |
2242 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2248 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2252 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2256 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2260 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2264 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2268 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2272 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2276 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2277 PIPE_CONFIG(ADDR_SURF_P2) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2280 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2281 PIPE_CONFIG(ADDR_SURF_P2) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2284 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2285 PIPE_CONFIG(ADDR_SURF_P2) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2289 PIPE_CONFIG(ADDR_SURF_P2) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2293 PIPE_CONFIG(ADDR_SURF_P2) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2296 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2297 PIPE_CONFIG(ADDR_SURF_P2) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2300 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301 PIPE_CONFIG(ADDR_SURF_P2) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2305 PIPE_CONFIG(ADDR_SURF_P2) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2308
2309 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2312 NUM_BANKS(ADDR_SURF_8_BANK));
2313 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2316 NUM_BANKS(ADDR_SURF_8_BANK));
2317 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2320 NUM_BANKS(ADDR_SURF_8_BANK));
2321 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2324 NUM_BANKS(ADDR_SURF_8_BANK));
2325 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2328 NUM_BANKS(ADDR_SURF_8_BANK));
2329 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2332 NUM_BANKS(ADDR_SURF_8_BANK));
2333 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2336 NUM_BANKS(ADDR_SURF_8_BANK));
2337 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2340 NUM_BANKS(ADDR_SURF_16_BANK));
2341 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2344 NUM_BANKS(ADDR_SURF_16_BANK));
2345 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2348 NUM_BANKS(ADDR_SURF_16_BANK));
2349 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2352 NUM_BANKS(ADDR_SURF_16_BANK));
2353 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2356 NUM_BANKS(ADDR_SURF_16_BANK));
2357 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360 NUM_BANKS(ADDR_SURF_16_BANK));
2361 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 NUM_BANKS(ADDR_SURF_8_BANK));
2365
2366 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2367 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2368 reg_offset != 23)
2369 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2370
2371 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2372 if (reg_offset != 7)
2373 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2374
8cdacf44 2375 break;
af15a2d5 2376 case CHIP_FIJI:
71765469 2377 case CHIP_VEGAM:
90bea0ab
TSD
2378 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2382 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2386 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2389 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2390 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2393 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2394 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2397 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2398 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2401 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2402 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2408 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2409 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2410 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2412 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2448 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2452 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2456 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2457 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2460 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2461 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2469 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2477 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2484 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2488 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2496 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2500
2501 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504 NUM_BANKS(ADDR_SURF_8_BANK));
2505 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508 NUM_BANKS(ADDR_SURF_8_BANK));
2509 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 NUM_BANKS(ADDR_SURF_8_BANK));
2513 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2516 NUM_BANKS(ADDR_SURF_8_BANK));
2517 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 NUM_BANKS(ADDR_SURF_8_BANK));
2521 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 NUM_BANKS(ADDR_SURF_8_BANK));
2525 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2528 NUM_BANKS(ADDR_SURF_8_BANK));
2529 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532 NUM_BANKS(ADDR_SURF_8_BANK));
2533 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2536 NUM_BANKS(ADDR_SURF_8_BANK));
2537 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2540 NUM_BANKS(ADDR_SURF_8_BANK));
2541 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2544 NUM_BANKS(ADDR_SURF_8_BANK));
2545 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 NUM_BANKS(ADDR_SURF_8_BANK));
2549 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2552 NUM_BANKS(ADDR_SURF_8_BANK));
2553 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2556 NUM_BANKS(ADDR_SURF_4_BANK));
2557
2558 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2559 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2560
2561 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2562 if (reg_offset != 7)
2563 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2564
5f2e816b 2565 break;
aaa36a97 2566 case CHIP_TONGA:
90bea0ab
TSD
2567 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2570 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2574 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2575 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2578 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2579 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2582 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2583 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2586 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2590 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2594 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2596 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2598 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2601 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2632 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2637 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2641 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2642 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2645 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2646 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2649 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2650 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2653 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2658 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2666 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689
2690 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693 NUM_BANKS(ADDR_SURF_16_BANK));
2694 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2697 NUM_BANKS(ADDR_SURF_16_BANK));
2698 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2701 NUM_BANKS(ADDR_SURF_16_BANK));
2702 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2705 NUM_BANKS(ADDR_SURF_16_BANK));
2706 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2709 NUM_BANKS(ADDR_SURF_16_BANK));
2710 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2713 NUM_BANKS(ADDR_SURF_16_BANK));
2714 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2717 NUM_BANKS(ADDR_SURF_16_BANK));
2718 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2720 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2721 NUM_BANKS(ADDR_SURF_16_BANK));
2722 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2725 NUM_BANKS(ADDR_SURF_16_BANK));
2726 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729 NUM_BANKS(ADDR_SURF_16_BANK));
2730 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2733 NUM_BANKS(ADDR_SURF_16_BANK));
2734 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2736 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2737 NUM_BANKS(ADDR_SURF_8_BANK));
2738 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741 NUM_BANKS(ADDR_SURF_4_BANK));
2742 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2745 NUM_BANKS(ADDR_SURF_4_BANK));
2746
2747 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2748 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2749
2750 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2751 if (reg_offset != 7)
2752 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2753
68182d90 2754 break;
2cc0c0b5 2755 case CHIP_POLARIS11:
c4642a47 2756 case CHIP_POLARIS12:
68182d90
FC
2757 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2761 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2765 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2769 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2777 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2781 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2785 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2789 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2790 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2791 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2802 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2806 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2831 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2832 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2835 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2836 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2839 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2843 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2847 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2851 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2855 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2863 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2875 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2879
2880 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2882 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2883 NUM_BANKS(ADDR_SURF_16_BANK));
2884
2885 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2888 NUM_BANKS(ADDR_SURF_16_BANK));
2889
2890 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2893 NUM_BANKS(ADDR_SURF_16_BANK));
2894
2895 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898 NUM_BANKS(ADDR_SURF_16_BANK));
2899
2900 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2903 NUM_BANKS(ADDR_SURF_16_BANK));
2904
2905 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2908 NUM_BANKS(ADDR_SURF_16_BANK));
2909
2910 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2913 NUM_BANKS(ADDR_SURF_16_BANK));
2914
2915 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918 NUM_BANKS(ADDR_SURF_16_BANK));
2919
2920 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 NUM_BANKS(ADDR_SURF_16_BANK));
2924
2925 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2928 NUM_BANKS(ADDR_SURF_16_BANK));
2929
2930 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933 NUM_BANKS(ADDR_SURF_16_BANK));
2934
2935 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2937 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2938 NUM_BANKS(ADDR_SURF_16_BANK));
2939
2940 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2943 NUM_BANKS(ADDR_SURF_8_BANK));
2944
2945 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2948 NUM_BANKS(ADDR_SURF_4_BANK));
2949
2950 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2951 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2952
2953 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2954 if (reg_offset != 7)
2955 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2956
2957 break;
2cc0c0b5 2958 case CHIP_POLARIS10:
68182d90
FC
2959 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2963 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2967 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2971 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2975 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2979 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2983 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2987 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2988 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2989 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2991 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2993 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3008 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3016 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3024 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3033 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3034 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3037 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3041 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3042 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3045 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3050 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3073 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3077 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3078 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3081
3082 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3084 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085 NUM_BANKS(ADDR_SURF_16_BANK));
3086
3087 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 NUM_BANKS(ADDR_SURF_16_BANK));
3091
3092 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3094 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3095 NUM_BANKS(ADDR_SURF_16_BANK));
3096
3097 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3100 NUM_BANKS(ADDR_SURF_16_BANK));
3101
3102 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105 NUM_BANKS(ADDR_SURF_16_BANK));
3106
3107 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3109 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3110 NUM_BANKS(ADDR_SURF_16_BANK));
3111
3112 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3115 NUM_BANKS(ADDR_SURF_16_BANK));
3116
3117 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120 NUM_BANKS(ADDR_SURF_16_BANK));
3121
3122 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125 NUM_BANKS(ADDR_SURF_16_BANK));
3126
3127 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3129 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3130 NUM_BANKS(ADDR_SURF_16_BANK));
3131
3132 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3135 NUM_BANKS(ADDR_SURF_16_BANK));
3136
3137 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3140 NUM_BANKS(ADDR_SURF_8_BANK));
3141
3142 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3145 NUM_BANKS(ADDR_SURF_4_BANK));
3146
3147 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3150 NUM_BANKS(ADDR_SURF_4_BANK));
3151
3152 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3153 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3154
3155 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3156 if (reg_offset != 7)
3157 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3158
aaa36a97 3159 break;
e3c7656c 3160 case CHIP_STONEY:
90bea0ab
TSD
3161 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3162 PIPE_CONFIG(ADDR_SURF_P2) |
3163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3165 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3166 PIPE_CONFIG(ADDR_SURF_P2) |
3167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3169 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170 PIPE_CONFIG(ADDR_SURF_P2) |
3171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174 PIPE_CONFIG(ADDR_SURF_P2) |
3175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178 PIPE_CONFIG(ADDR_SURF_P2) |
3179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3182 PIPE_CONFIG(ADDR_SURF_P2) |
3183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3186 PIPE_CONFIG(ADDR_SURF_P2) |
3187 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3189 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3190 PIPE_CONFIG(ADDR_SURF_P2));
3191 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3192 PIPE_CONFIG(ADDR_SURF_P2) |
3193 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3195 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196 PIPE_CONFIG(ADDR_SURF_P2) |
3197 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3200 PIPE_CONFIG(ADDR_SURF_P2) |
3201 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3203 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3204 PIPE_CONFIG(ADDR_SURF_P2) |
3205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208 PIPE_CONFIG(ADDR_SURF_P2) |
3209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3212 PIPE_CONFIG(ADDR_SURF_P2) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3216 PIPE_CONFIG(ADDR_SURF_P2) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3219 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3220 PIPE_CONFIG(ADDR_SURF_P2) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3223 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3224 PIPE_CONFIG(ADDR_SURF_P2) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3227 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3228 PIPE_CONFIG(ADDR_SURF_P2) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3232 PIPE_CONFIG(ADDR_SURF_P2) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3236 PIPE_CONFIG(ADDR_SURF_P2) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3240 PIPE_CONFIG(ADDR_SURF_P2) |
3241 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3244 PIPE_CONFIG(ADDR_SURF_P2) |
3245 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3248 PIPE_CONFIG(ADDR_SURF_P2) |
3249 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3251 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3255 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3259 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3263
3264 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267 NUM_BANKS(ADDR_SURF_8_BANK));
3268 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271 NUM_BANKS(ADDR_SURF_8_BANK));
3272 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3275 NUM_BANKS(ADDR_SURF_8_BANK));
3276 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3279 NUM_BANKS(ADDR_SURF_8_BANK));
3280 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 NUM_BANKS(ADDR_SURF_8_BANK));
3284 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287 NUM_BANKS(ADDR_SURF_8_BANK));
3288 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3291 NUM_BANKS(ADDR_SURF_8_BANK));
3292 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 NUM_BANKS(ADDR_SURF_16_BANK));
3296 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3299 NUM_BANKS(ADDR_SURF_16_BANK));
3300 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 NUM_BANKS(ADDR_SURF_16_BANK));
3304 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 NUM_BANKS(ADDR_SURF_16_BANK));
3308 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311 NUM_BANKS(ADDR_SURF_16_BANK));
3312 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315 NUM_BANKS(ADDR_SURF_16_BANK));
3316 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3319 NUM_BANKS(ADDR_SURF_8_BANK));
3320
3321 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3322 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3323 reg_offset != 23)
3324 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3325
3326 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3327 if (reg_offset != 7)
3328 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3329
e3c7656c 3330 break;
aaa36a97 3331 default:
90bea0ab
TSD
3332 dev_warn(adev->dev,
3333 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3334 adev->asic_type);
3335
3336 case CHIP_CARRIZO:
3337 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3338 PIPE_CONFIG(ADDR_SURF_P2) |
3339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3340 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3341 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3342 PIPE_CONFIG(ADDR_SURF_P2) |
3343 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3344 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3345 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3346 PIPE_CONFIG(ADDR_SURF_P2) |
3347 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3348 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3349 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3350 PIPE_CONFIG(ADDR_SURF_P2) |
3351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3353 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3354 PIPE_CONFIG(ADDR_SURF_P2) |
3355 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3356 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3357 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3358 PIPE_CONFIG(ADDR_SURF_P2) |
3359 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3360 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3361 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3362 PIPE_CONFIG(ADDR_SURF_P2) |
3363 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3364 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3365 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3366 PIPE_CONFIG(ADDR_SURF_P2));
3367 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3368 PIPE_CONFIG(ADDR_SURF_P2) |
3369 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3371 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3372 PIPE_CONFIG(ADDR_SURF_P2) |
3373 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3375 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3376 PIPE_CONFIG(ADDR_SURF_P2) |
3377 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3379 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3380 PIPE_CONFIG(ADDR_SURF_P2) |
3381 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384 PIPE_CONFIG(ADDR_SURF_P2) |
3385 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3387 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3388 PIPE_CONFIG(ADDR_SURF_P2) |
3389 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3391 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3392 PIPE_CONFIG(ADDR_SURF_P2) |
3393 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3395 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3396 PIPE_CONFIG(ADDR_SURF_P2) |
3397 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3399 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3400 PIPE_CONFIG(ADDR_SURF_P2) |
3401 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3403 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3404 PIPE_CONFIG(ADDR_SURF_P2) |
3405 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3407 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3408 PIPE_CONFIG(ADDR_SURF_P2) |
3409 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3411 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3412 PIPE_CONFIG(ADDR_SURF_P2) |
3413 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3415 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3416 PIPE_CONFIG(ADDR_SURF_P2) |
3417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3419 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3420 PIPE_CONFIG(ADDR_SURF_P2) |
3421 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3423 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3424 PIPE_CONFIG(ADDR_SURF_P2) |
3425 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3427 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3428 PIPE_CONFIG(ADDR_SURF_P2) |
3429 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3431 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3432 PIPE_CONFIG(ADDR_SURF_P2) |
3433 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3435 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3436 PIPE_CONFIG(ADDR_SURF_P2) |
3437 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3439
3440 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3443 NUM_BANKS(ADDR_SURF_8_BANK));
3444 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3447 NUM_BANKS(ADDR_SURF_8_BANK));
3448 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3451 NUM_BANKS(ADDR_SURF_8_BANK));
3452 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3455 NUM_BANKS(ADDR_SURF_8_BANK));
3456 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3459 NUM_BANKS(ADDR_SURF_8_BANK));
3460 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3463 NUM_BANKS(ADDR_SURF_8_BANK));
3464 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3467 NUM_BANKS(ADDR_SURF_8_BANK));
3468 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3471 NUM_BANKS(ADDR_SURF_16_BANK));
3472 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3475 NUM_BANKS(ADDR_SURF_16_BANK));
3476 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3479 NUM_BANKS(ADDR_SURF_16_BANK));
3480 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3483 NUM_BANKS(ADDR_SURF_16_BANK));
3484 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3487 NUM_BANKS(ADDR_SURF_16_BANK));
3488 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3491 NUM_BANKS(ADDR_SURF_16_BANK));
3492 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3495 NUM_BANKS(ADDR_SURF_8_BANK));
3496
3497 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3498 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3499 reg_offset != 23)
3500 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3501
3502 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3503 if (reg_offset != 7)
3504 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3505
3506 break;
aaa36a97
AD
3507 }
3508}
3509
05fb7291 3510static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3511 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3512{
9559ef5b
TSD
3513 u32 data;
3514
3515 if (instance == 0xffffffff)
3516 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3517 else
3518 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97 3519
5003f278 3520 if (se_num == 0xffffffff)
aaa36a97 3521 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
5003f278 3522 else
aaa36a97 3523 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
5003f278
TSD
3524
3525 if (sh_num == 0xffffffff)
3526 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3527 else
aaa36a97 3528 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
5003f278 3529
aaa36a97
AD
3530 WREG32(mmGRBM_GFX_INDEX, data);
3531}
3532
f7a9ee81
AG
3533static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3534 u32 me, u32 pipe, u32 q)
3535{
3536 vi_srbm_select(adev, me, pipe, q, 0);
3537}
3538
8f8e00c1 3539static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3540{
3541 u32 data, mask;
3542
5003f278
TSD
3543 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3544 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
aaa36a97 3545
5003f278 3546 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
aaa36a97 3547
378506a7
AD
3548 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3549 adev->gfx.config.max_sh_per_se);
aaa36a97 3550
8f8e00c1 3551 return (~data) & mask;
aaa36a97
AD
3552}
3553
167ac573
HR
3554static void
3555gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3556{
3557 switch (adev->asic_type) {
3558 case CHIP_FIJI:
71765469 3559 case CHIP_VEGAM:
167ac573
HR
3560 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3561 RB_XSEL2(1) | PKR_MAP(2) |
3562 PKR_XSEL(1) | PKR_YSEL(1) |
3563 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3564 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3565 SE_PAIR_YSEL(2);
3566 break;
3567 case CHIP_TONGA:
3568 case CHIP_POLARIS10:
3569 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3570 SE_XSEL(1) | SE_YSEL(1);
3571 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3572 SE_PAIR_YSEL(2);
3573 break;
3574 case CHIP_TOPAZ:
3575 case CHIP_CARRIZO:
3576 *rconf |= RB_MAP_PKR0(2);
3577 *rconf1 |= 0x0;
3578 break;
3579 case CHIP_POLARIS11:
c4642a47 3580 case CHIP_POLARIS12:
167ac573
HR
3581 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3582 SE_XSEL(1) | SE_YSEL(1);
3583 *rconf1 |= 0x0;
3584 break;
3585 case CHIP_STONEY:
3586 *rconf |= 0x0;
3587 *rconf1 |= 0x0;
3588 break;
3589 default:
3590 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3591 break;
3592 }
3593}
3594
3595static void
3596gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3597 u32 raster_config, u32 raster_config_1,
3598 unsigned rb_mask, unsigned num_rb)
3599{
3600 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3601 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3602 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3603 unsigned rb_per_se = num_rb / num_se;
3604 unsigned se_mask[4];
3605 unsigned se;
3606
3607 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3608 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3609 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3610 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3611
3612 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3613 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3614 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3615
3616 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3617 (!se_mask[2] && !se_mask[3]))) {
3618 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3619
3620 if (!se_mask[0] && !se_mask[1]) {
3621 raster_config_1 |=
3622 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3623 } else {
3624 raster_config_1 |=
3625 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3626 }
3627 }
3628
3629 for (se = 0; se < num_se; se++) {
3630 unsigned raster_config_se = raster_config;
3631 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3632 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3633 int idx = (se / 2) * 2;
3634
3635 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3636 raster_config_se &= ~SE_MAP_MASK;
3637
3638 if (!se_mask[idx]) {
3639 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3640 } else {
3641 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3642 }
3643 }
3644
3645 pkr0_mask &= rb_mask;
3646 pkr1_mask &= rb_mask;
3647 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3648 raster_config_se &= ~PKR_MAP_MASK;
3649
3650 if (!pkr0_mask) {
3651 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3652 } else {
3653 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3654 }
3655 }
3656
3657 if (rb_per_se >= 2) {
3658 unsigned rb0_mask = 1 << (se * rb_per_se);
3659 unsigned rb1_mask = rb0_mask << 1;
3660
3661 rb0_mask &= rb_mask;
3662 rb1_mask &= rb_mask;
3663 if (!rb0_mask || !rb1_mask) {
3664 raster_config_se &= ~RB_MAP_PKR0_MASK;
3665
3666 if (!rb0_mask) {
3667 raster_config_se |=
3668 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3669 } else {
3670 raster_config_se |=
3671 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3672 }
3673 }
3674
3675 if (rb_per_se > 2) {
3676 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3677 rb1_mask = rb0_mask << 1;
3678 rb0_mask &= rb_mask;
3679 rb1_mask &= rb_mask;
3680 if (!rb0_mask || !rb1_mask) {
3681 raster_config_se &= ~RB_MAP_PKR1_MASK;
3682
3683 if (!rb0_mask) {
3684 raster_config_se |=
3685 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3686 } else {
3687 raster_config_se |=
3688 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3689 }
3690 }
3691 }
3692 }
3693
3694 /* GRBM_GFX_INDEX has a different offset on VI */
3695 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3696 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3697 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3698 }
3699
3700 /* GRBM_GFX_INDEX has a different offset on VI */
3701 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3702}
3703
8f8e00c1 3704static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3705{
3706 int i, j;
aac1e3ca 3707 u32 data;
167ac573 3708 u32 raster_config = 0, raster_config_1 = 0;
8f8e00c1 3709 u32 active_rbs = 0;
6157bd7a
FC
3710 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3711 adev->gfx.config.max_sh_per_se;
167ac573 3712 unsigned num_rb_pipes;
aaa36a97
AD
3713
3714 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3715 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3716 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3717 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3718 data = gfx_v8_0_get_rb_active_bitmap(adev);
3719 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3720 rb_bitmap_width_per_sh);
aaa36a97
AD
3721 }
3722 }
9559ef5b 3723 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97 3724
8f8e00c1 3725 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3726 adev->gfx.config.num_rbs = hweight32(active_rbs);
167ac573
HR
3727
3728 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3729 adev->gfx.config.max_shader_engines, 16);
3730
3731 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3732
3733 if (!adev->gfx.config.backend_enable_mask ||
3734 adev->gfx.config.num_rbs >= num_rb_pipes) {
3735 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3736 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3737 } else {
3738 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3739 adev->gfx.config.backend_enable_mask,
3740 num_rb_pipes);
3741 }
3742
392f0c77
AD
3743 /* cache the values for userspace */
3744 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3745 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3746 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3747 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3748 RREG32(mmCC_RB_BACKEND_DISABLE);
3749 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3750 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3751 adev->gfx.config.rb_config[i][j].raster_config =
3752 RREG32(mmPA_SC_RASTER_CONFIG);
3753 adev->gfx.config.rb_config[i][j].raster_config_1 =
3754 RREG32(mmPA_SC_RASTER_CONFIG_1);
3755 }
3756 }
3757 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
167ac573 3758 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
3759}
3760
cd06bf68 3761/**
35c7a952 3762 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68 3763 *
dc102c43 3764 * @adev: amdgpu_device pointer
cd06bf68
BG
3765 *
3766 * Initialize compute vmid sh_mem registers
3767 *
3768 */
3769#define DEFAULT_SH_MEM_BASES (0x6000)
3770#define FIRST_COMPUTE_VMID (8)
3771#define LAST_COMPUTE_VMID (16)
35c7a952 3772static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3773{
3774 int i;
3775 uint32_t sh_mem_config;
3776 uint32_t sh_mem_bases;
3777
3778 /*
3779 * Configure apertures:
3780 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3781 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3782 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3783 */
3784 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3785
3786 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3787 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3788 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3789 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3790 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3791 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3792
3793 mutex_lock(&adev->srbm_mutex);
3794 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3795 vi_srbm_select(adev, 0, 0, 0, i);
3796 /* CP and shaders */
3797 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3798 WREG32(mmSH_MEM_APE1_BASE, 1);
3799 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3800 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3801 }
3802 vi_srbm_select(adev, 0, 0, 0, 0);
3803 mutex_unlock(&adev->srbm_mutex);
3804}
3805
df6e2c4a
JZ
3806static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3807{
3808 switch (adev->asic_type) {
3809 default:
3810 adev->gfx.config.double_offchip_lds_buf = 1;
3811 break;
3812 case CHIP_CARRIZO:
3813 case CHIP_STONEY:
3814 adev->gfx.config.double_offchip_lds_buf = 0;
3815 break;
3816 }
3817}
3818
aaa36a97
AD
3819static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3820{
8fe73328 3821 u32 tmp, sh_static_mem_cfg;
aaa36a97
AD
3822 int i;
3823
61cb8cef 3824 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
0bde3a95
AD
3825 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3826 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3827 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3828
3829 gfx_v8_0_tiling_mode_table_init(adev);
8f8e00c1 3830 gfx_v8_0_setup_rb(adev);
7dae69a2 3831 gfx_v8_0_get_cu_info(adev);
df6e2c4a 3832 gfx_v8_0_config_init(adev);
aaa36a97
AD
3833
3834 /* XXX SH_MEM regs */
3835 /* where to put LDS, scratch, GPUVM in FSA64 space */
8fe73328
JZ
3836 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3837 SWIZZLE_ENABLE, 1);
3838 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3839 ELEMENT_SIZE, 1);
3840 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3841 INDEX_STRIDE, 3);
111159b5
FK
3842 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3843
aaa36a97 3844 mutex_lock(&adev->srbm_mutex);
7645670d 3845 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
aaa36a97
AD
3846 vi_srbm_select(adev, 0, 0, 0, i);
3847 /* CP and shaders */
3848 if (i == 0) {
3849 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3850 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3851 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3852 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3853 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328 3854 WREG32(mmSH_MEM_BASES, 0);
aaa36a97
AD
3855 } else {
3856 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
8fe73328 3857 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3858 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3859 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3860 WREG32(mmSH_MEM_CONFIG, tmp);
770d13b1 3861 tmp = adev->gmc.shared_aperture_start >> 48;
8fe73328 3862 WREG32(mmSH_MEM_BASES, tmp);
aaa36a97
AD
3863 }
3864
3865 WREG32(mmSH_MEM_APE1_BASE, 1);
3866 WREG32(mmSH_MEM_APE1_LIMIT, 0);
aaa36a97
AD
3867 }
3868 vi_srbm_select(adev, 0, 0, 0, 0);
3869 mutex_unlock(&adev->srbm_mutex);
3870
35c7a952 3871 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3872
aaa36a97
AD
3873 mutex_lock(&adev->grbm_idx_mutex);
3874 /*
3875 * making sure that the following register writes will be broadcasted
3876 * to all the shaders
3877 */
9559ef5b 3878 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3879
3880 WREG32(mmPA_SC_FIFO_SIZE,
3881 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3882 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3883 (adev->gfx.config.sc_prim_fifo_size_backend <<
3884 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3885 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3886 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3887 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3888 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
d2383267 3889
3890 tmp = RREG32(mmSPI_ARB_PRIORITY);
3891 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3892 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3893 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3894 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3895 WREG32(mmSPI_ARB_PRIORITY, tmp);
3896
aaa36a97
AD
3897 mutex_unlock(&adev->grbm_idx_mutex);
3898
3899}
3900
3901static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3902{
3903 u32 i, j, k;
3904 u32 mask;
3905
3906 mutex_lock(&adev->grbm_idx_mutex);
3907 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3908 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3909 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
3910 for (k = 0; k < adev->usec_timeout; k++) {
3911 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3912 break;
3913 udelay(1);
3914 }
1366b2d0 3915 if (k == adev->usec_timeout) {
3916 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3917 0xffffffff, 0xffffffff);
3918 mutex_unlock(&adev->grbm_idx_mutex);
3919 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3920 i, j);
3921 return;
3922 }
aaa36a97
AD
3923 }
3924 }
9559ef5b 3925 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3926 mutex_unlock(&adev->grbm_idx_mutex);
3927
3928 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3929 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3930 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3931 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3932 for (k = 0; k < adev->usec_timeout; k++) {
3933 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3934 break;
3935 udelay(1);
3936 }
3937}
3938
3939static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3940 bool enable)
3941{
3942 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3943
0d07db7e
TSD
3944 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3945 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3946 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3947 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3948
aaa36a97
AD
3949 WREG32(mmCP_INT_CNTL_RING0, tmp);
3950}
3951
2b6cd977
EH
3952static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3953{
3954 /* csib */
3955 WREG32(mmRLC_CSIB_ADDR_HI,
3956 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3957 WREG32(mmRLC_CSIB_ADDR_LO,
3958 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3959 WREG32(mmRLC_CSIB_LENGTH,
3960 adev->gfx.rlc.clear_state_size);
3961}
3962
3963static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3964 int ind_offset,
3965 int list_size,
3966 int *unique_indices,
3967 int *indices_count,
3968 int max_indices,
3969 int *ind_start_offsets,
3970 int *offset_count,
3971 int max_offset)
3972{
3973 int indices;
3974 bool new_entry = true;
3975
3976 for (; ind_offset < list_size; ind_offset++) {
3977
3978 if (new_entry) {
3979 new_entry = false;
3980 ind_start_offsets[*offset_count] = ind_offset;
3981 *offset_count = *offset_count + 1;
3982 BUG_ON(*offset_count >= max_offset);
3983 }
3984
3985 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3986 new_entry = true;
3987 continue;
3988 }
3989
3990 ind_offset += 2;
3991
3992 /* look for the matching indice */
3993 for (indices = 0;
3994 indices < *indices_count;
3995 indices++) {
3996 if (unique_indices[indices] ==
3997 register_list_format[ind_offset])
3998 break;
3999 }
4000
4001 if (indices >= *indices_count) {
4002 unique_indices[*indices_count] =
4003 register_list_format[ind_offset];
4004 indices = *indices_count;
4005 *indices_count = *indices_count + 1;
4006 BUG_ON(*indices_count >= max_indices);
4007 }
4008
4009 register_list_format[ind_offset] = indices;
4010 }
4011}
4012
4013static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4014{
4015 int i, temp, data;
4016 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4017 int indices_count = 0;
4018 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4019 int offset_count = 0;
4020
4021 int list_size;
4022 unsigned int *register_list_format =
4023 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3f12325a 4024 if (!register_list_format)
2b6cd977
EH
4025 return -ENOMEM;
4026 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4027 adev->gfx.rlc.reg_list_format_size_bytes);
4028
4029 gfx_v8_0_parse_ind_reg_list(register_list_format,
4030 RLC_FormatDirectRegListLength,
4031 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4032 unique_indices,
4033 &indices_count,
c1b24a14 4034 ARRAY_SIZE(unique_indices),
2b6cd977
EH
4035 indirect_start_offsets,
4036 &offset_count,
c1b24a14 4037 ARRAY_SIZE(indirect_start_offsets));
2b6cd977
EH
4038
4039 /* save and restore list */
61cb8cef 4040 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
2b6cd977
EH
4041
4042 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4043 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4044 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4045
4046 /* indirect list */
4047 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4048 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4049 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4050
4051 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4052 list_size = list_size >> 1;
4053 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4054 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4055
4056 /* starting offsets starts */
4057 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4058 adev->gfx.rlc.starting_offsets_start);
c1b24a14 4059 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2b6cd977
EH
4060 WREG32(mmRLC_GPM_SCRATCH_DATA,
4061 indirect_start_offsets[i]);
4062
4063 /* unique indices */
4064 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4065 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
c1b24a14 4066 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
202e0b22 4067 if (unique_indices[i] != 0) {
b85c9d2a
ML
4068 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4069 WREG32(data + i, unique_indices[i] >> 20);
202e0b22 4070 }
2b6cd977
EH
4071 }
4072 kfree(register_list_format);
4073
4074 return 0;
4075}
4076
4077static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4078{
61cb8cef 4079 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
2b6cd977
EH
4080}
4081
fb16007b 4082static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
4083{
4084 uint32_t data;
4085
c4d17b81
RZ
4086 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4087
4088 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4089 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4090 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4091 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4092 WREG32(mmRLC_PG_DELAY, data);
4093
4094 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4095 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4096
f4bfffdd
EH
4097}
4098
2c547165
AD
4099static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4100 bool enable)
4101{
61cb8cef 4102 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
2c547165
AD
4103}
4104
4105static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4106 bool enable)
4107{
61cb8cef 4108 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
2c547165
AD
4109}
4110
4111static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4112{
eb584241 4113 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
2c547165
AD
4114}
4115
2b6cd977
EH
4116static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4117{
c4d17b81
RZ
4118 if ((adev->asic_type == CHIP_CARRIZO) ||
4119 (adev->asic_type == CHIP_STONEY)) {
2b6cd977
EH
4120 gfx_v8_0_init_csb(adev);
4121 gfx_v8_0_init_save_restore_list(adev);
4122 gfx_v8_0_enable_save_restore_machine(adev);
c4d17b81
RZ
4123 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4124 gfx_v8_0_init_power_gating(adev);
4125 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
c4642a47 4126 } else if ((adev->asic_type == CHIP_POLARIS11) ||
71765469
LL
4127 (adev->asic_type == CHIP_POLARIS12) ||
4128 (adev->asic_type == CHIP_VEGAM)) {
c4d17b81
RZ
4129 gfx_v8_0_init_csb(adev);
4130 gfx_v8_0_init_save_restore_list(adev);
4131 gfx_v8_0_enable_save_restore_machine(adev);
4132 gfx_v8_0_init_power_gating(adev);
2b6cd977 4133 }
c4d17b81 4134
2b6cd977
EH
4135}
4136
761c2e82 4137static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
aaa36a97 4138{
61cb8cef 4139 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
aaa36a97
AD
4140
4141 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
aaa36a97
AD
4142 gfx_v8_0_wait_for_rlc_serdes(adev);
4143}
4144
4145static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4146{
61cb8cef 4147 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
aaa36a97 4148 udelay(50);
61cb8cef
TSD
4149
4150 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
aaa36a97
AD
4151 udelay(50);
4152}
4153
4154static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4155{
61cb8cef 4156 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
aaa36a97
AD
4157
4158 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 4159 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4160 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4161
4162 udelay(50);
4163}
4164
4165static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4166{
4167 const struct rlc_firmware_header_v2_0 *hdr;
4168 const __le32 *fw_data;
4169 unsigned i, fw_size;
4170
4171 if (!adev->gfx.rlc_fw)
4172 return -EINVAL;
4173
4174 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4175 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
4176
4177 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4178 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4179 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4180
4181 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4182 for (i = 0; i < fw_size; i++)
4183 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4184 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4185
4186 return 0;
4187}
4188
4189static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4190{
4191 int r;
6ae81452 4192 u32 tmp;
aaa36a97
AD
4193
4194 gfx_v8_0_rlc_stop(adev);
4195
4196 /* disable CG */
6ae81452
AD
4197 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4198 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4199 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4200 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
2cc0c0b5 4201 if (adev->asic_type == CHIP_POLARIS11 ||
c4642a47 4202 adev->asic_type == CHIP_POLARIS10 ||
71765469
LL
4203 adev->asic_type == CHIP_POLARIS12 ||
4204 adev->asic_type == CHIP_VEGAM) {
6ae81452
AD
4205 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4206 tmp &= ~0x3;
4207 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4208 }
aaa36a97
AD
4209
4210 /* disable PG */
4211 WREG32(mmRLC_PG_CNTL, 0);
4212
4213 gfx_v8_0_rlc_reset(adev);
2b6cd977
EH
4214 gfx_v8_0_init_pg(adev);
4215
790d84fd
RZ
4216
4217 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4218 /* legacy rlc firmware loading */
4219 r = gfx_v8_0_rlc_load_microcode(adev);
4220 if (r)
4221 return r;
aaa36a97
AD
4222 }
4223
4224 gfx_v8_0_rlc_start(adev);
4225
4226 return 0;
4227}
4228
4229static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4230{
4231 int i;
4232 u32 tmp = RREG32(mmCP_ME_CNTL);
4233
4234 if (enable) {
4235 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4236 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4237 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4238 } else {
4239 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4240 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4241 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4242 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4243 adev->gfx.gfx_ring[i].ready = false;
4244 }
4245 WREG32(mmCP_ME_CNTL, tmp);
4246 udelay(50);
4247}
4248
4249static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4250{
4251 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4252 const struct gfx_firmware_header_v1_0 *ce_hdr;
4253 const struct gfx_firmware_header_v1_0 *me_hdr;
4254 const __le32 *fw_data;
4255 unsigned i, fw_size;
4256
4257 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4258 return -EINVAL;
4259
4260 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4261 adev->gfx.pfp_fw->data;
4262 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4263 adev->gfx.ce_fw->data;
4264 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4265 adev->gfx.me_fw->data;
4266
4267 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4268 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4269 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4270
4271 gfx_v8_0_cp_gfx_enable(adev, false);
4272
4273 /* PFP */
4274 fw_data = (const __le32 *)
4275 (adev->gfx.pfp_fw->data +
4276 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4277 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4278 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4279 for (i = 0; i < fw_size; i++)
4280 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4281 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4282
4283 /* CE */
4284 fw_data = (const __le32 *)
4285 (adev->gfx.ce_fw->data +
4286 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4287 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4288 WREG32(mmCP_CE_UCODE_ADDR, 0);
4289 for (i = 0; i < fw_size; i++)
4290 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4291 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4292
4293 /* ME */
4294 fw_data = (const __le32 *)
4295 (adev->gfx.me_fw->data +
4296 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4297 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4298 WREG32(mmCP_ME_RAM_WADDR, 0);
4299 for (i = 0; i < fw_size; i++)
4300 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4301 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4302
4303 return 0;
4304}
4305
4306static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4307{
4308 u32 count = 0;
4309 const struct cs_section_def *sect = NULL;
4310 const struct cs_extent_def *ext = NULL;
4311
4312 /* begin clear state */
4313 count += 2;
4314 /* context control state */
4315 count += 3;
4316
4317 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4318 for (ext = sect->section; ext->extent != NULL; ++ext) {
4319 if (sect->id == SECT_CONTEXT)
4320 count += 2 + ext->reg_count;
4321 else
4322 return 0;
4323 }
4324 }
4325 /* pa_sc_raster_config/pa_sc_raster_config1 */
4326 count += 4;
4327 /* end clear state */
4328 count += 2;
4329 /* clear state */
4330 count += 2;
4331
4332 return count;
4333}
4334
4335static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4336{
4337 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4338 const struct cs_section_def *sect = NULL;
4339 const struct cs_extent_def *ext = NULL;
4340 int r, i;
4341
4342 /* init the CP */
4343 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4344 WREG32(mmCP_ENDIAN_SWAP, 0);
4345 WREG32(mmCP_DEVICE_ID, 1);
4346
4347 gfx_v8_0_cp_gfx_enable(adev, true);
4348
a27de35c 4349 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4350 if (r) {
4351 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4352 return r;
4353 }
4354
4355 /* clear state buffer */
4356 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4357 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4358
4359 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4360 amdgpu_ring_write(ring, 0x80000000);
4361 amdgpu_ring_write(ring, 0x80000000);
4362
4363 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4364 for (ext = sect->section; ext->extent != NULL; ++ext) {
4365 if (sect->id == SECT_CONTEXT) {
4366 amdgpu_ring_write(ring,
4367 PACKET3(PACKET3_SET_CONTEXT_REG,
4368 ext->reg_count));
4369 amdgpu_ring_write(ring,
4370 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4371 for (i = 0; i < ext->reg_count; i++)
4372 amdgpu_ring_write(ring, ext->extent[i]);
4373 }
4374 }
4375 }
4376
4377 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4378 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
93442184
AD
4379 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4380 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
aaa36a97
AD
4381
4382 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4383 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4384
4385 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4386 amdgpu_ring_write(ring, 0);
4387
4388 /* init the CE partitions */
4389 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4390 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4391 amdgpu_ring_write(ring, 0x8000);
4392 amdgpu_ring_write(ring, 0x8000);
4393
a27de35c 4394 amdgpu_ring_commit(ring);
aaa36a97
AD
4395
4396 return 0;
4397}
4f339b29
RZ
4398static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4399{
4400 u32 tmp;
4401 /* no gfx doorbells on iceland */
4402 if (adev->asic_type == CHIP_TOPAZ)
4403 return;
4404
4405 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4406
4407 if (ring->use_doorbell) {
4408 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4409 DOORBELL_OFFSET, ring->doorbell_index);
4410 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4411 DOORBELL_HIT, 0);
4412 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4413 DOORBELL_EN, 1);
4414 } else {
4415 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4416 }
4417
4418 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4419
4420 if (adev->flags & AMD_IS_APU)
4421 return;
4422
4423 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4424 DOORBELL_RANGE_LOWER,
4425 AMDGPU_DOORBELL_GFX_RING0);
4426 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4427
4428 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4429 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4430}
aaa36a97
AD
4431
4432static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4433{
4434 struct amdgpu_ring *ring;
4435 u32 tmp;
4436 u32 rb_bufsz;
42e8cb50 4437 u64 rb_addr, rptr_addr, wptr_gpu_addr;
aaa36a97
AD
4438 int r;
4439
4440 /* Set the write pointer delay */
4441 WREG32(mmCP_RB_WPTR_DELAY, 0);
4442
4443 /* set the RB to use vmid 0 */
4444 WREG32(mmCP_RB_VMID, 0);
4445
4446 /* Set ring buffer size */
4447 ring = &adev->gfx.gfx_ring[0];
4448 rb_bufsz = order_base_2(ring->ring_size / 8);
4449 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4450 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4451 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4452 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4453#ifdef __BIG_ENDIAN
4454 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4455#endif
4456 WREG32(mmCP_RB0_CNTL, tmp);
4457
4458 /* Initialize the ring buffer's read and write pointers */
4459 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4460 ring->wptr = 0;
536fbf94 4461 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
4462
4463 /* set the wb address wether it's enabled or not */
4464 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4465 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4466 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4467
42e8cb50
FM
4468 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4469 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4470 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
aaa36a97
AD
4471 mdelay(1);
4472 WREG32(mmCP_RB0_CNTL, tmp);
4473
4474 rb_addr = ring->gpu_addr >> 8;
4475 WREG32(mmCP_RB0_BASE, rb_addr);
4476 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4477
4f339b29 4478 gfx_v8_0_set_cpg_door_bell(adev, ring);
aaa36a97 4479 /* start the ring */
f6bd7942 4480 amdgpu_ring_clear_ring(ring);
aaa36a97
AD
4481 gfx_v8_0_cp_gfx_start(adev);
4482 ring->ready = true;
4483 r = amdgpu_ring_test_ring(ring);
5003f278 4484 if (r)
aaa36a97 4485 ring->ready = false;
aaa36a97 4486
5003f278 4487 return r;
aaa36a97
AD
4488}
4489
4490static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4491{
4492 int i;
4493
4494 if (enable) {
4495 WREG32(mmCP_MEC_CNTL, 0);
4496 } else {
4497 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4498 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4499 adev->gfx.compute_ring[i].ready = false;
fcf17a43 4500 adev->gfx.kiq.ring.ready = false;
aaa36a97
AD
4501 }
4502 udelay(50);
4503}
4504
aaa36a97
AD
4505static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4506{
4507 const struct gfx_firmware_header_v1_0 *mec_hdr;
4508 const __le32 *fw_data;
4509 unsigned i, fw_size;
4510
4511 if (!adev->gfx.mec_fw)
4512 return -EINVAL;
4513
4514 gfx_v8_0_cp_compute_enable(adev, false);
4515
4516 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4517 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4518
4519 fw_data = (const __le32 *)
4520 (adev->gfx.mec_fw->data +
4521 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4522 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4523
4524 /* MEC1 */
4525 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4526 for (i = 0; i < fw_size; i++)
4527 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4528 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4529
4530 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4531 if (adev->gfx.mec2_fw) {
4532 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4533
4534 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4535 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4536
4537 fw_data = (const __le32 *)
4538 (adev->gfx.mec2_fw->data +
4539 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4540 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4541
4542 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4543 for (i = 0; i < fw_size; i++)
4544 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4545 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4546 }
4547
4548 return 0;
4549}
4550
4e638ae9
XY
4551/* KIQ functions */
4552static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4553{
4554 uint32_t tmp;
4555 struct amdgpu_device *adev = ring->adev;
4556
4557 /* tell RLC which is KIQ queue */
4558 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4559 tmp &= 0xffffff00;
4560 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4561 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4562 tmp |= 0x80;
4563 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4564}
4565
346586d5 4566static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4e638ae9 4567{
c3a49ab5 4568 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
f776952b 4569 uint32_t scratch, tmp = 0;
de65513a 4570 uint64_t queue_mask = 0;
f776952b
AD
4571 int r, i;
4572
de65513a
AR
4573 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4574 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4575 continue;
4576
4577 /* This situation may be hit in the future if a new HW
4578 * generation exposes more than 64 queues. If so, the
4579 * definition of queue_mask needs updating */
1d11ee89 4580 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
de65513a
AR
4581 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4582 break;
4583 }
4584
4585 queue_mask |= (1ull << i);
4586 }
4587
f776952b
AD
4588 r = amdgpu_gfx_scratch_get(adev, &scratch);
4589 if (r) {
4590 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4591 return r;
4592 }
4593 WREG32(scratch, 0xCAFEDEAD);
4e638ae9 4594
346586d5 4595 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
f776952b
AD
4596 if (r) {
4597 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4598 amdgpu_gfx_scratch_free(adev, scratch);
4599 return r;
4600 }
4e638ae9 4601 /* set resources */
346586d5
AD
4602 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4603 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
de65513a
AR
4604 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4605 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
346586d5
AD
4606 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4607 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4608 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4609 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
c3a49ab5
AD
4610 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4611 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4612 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4613 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4614
4615 /* map queues */
4616 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4617 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3d7e30b3
AD
4618 amdgpu_ring_write(kiq_ring,
4619 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4620 amdgpu_ring_write(kiq_ring,
4621 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4622 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4623 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4624 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
c3a49ab5
AD
4625 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4626 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4627 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4628 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4629 }
f776952b
AD
4630 /* write to scratch for completion */
4631 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4632 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4633 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4e638ae9 4634 amdgpu_ring_commit(kiq_ring);
f776952b
AD
4635
4636 for (i = 0; i < adev->usec_timeout; i++) {
4637 tmp = RREG32(scratch);
4638 if (tmp == 0xDEADBEEF)
4639 break;
4640 DRM_UDELAY(1);
4641 }
4642 if (i >= adev->usec_timeout) {
c3a49ab5
AD
4643 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4644 scratch, tmp);
f776952b
AD
4645 r = -EINVAL;
4646 }
4647 amdgpu_gfx_scratch_free(adev, scratch);
4648
4649 return r;
4e638ae9
XY
4650}
4651
34130fb1
AR
4652static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4653{
4654 int i, r = 0;
4655
4656 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4657 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4658 for (i = 0; i < adev->usec_timeout; i++) {
4659 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4660 break;
4661 udelay(1);
4662 }
4663 if (i == adev->usec_timeout)
4664 r = -ETIMEDOUT;
4665 }
4666 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4667 WREG32(mmCP_HQD_PQ_RPTR, 0);
4668 WREG32(mmCP_HQD_PQ_WPTR, 0);
4669
4670 return r;
4e638ae9
XY
4671}
4672
a2140e00 4673static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4e638ae9 4674{
015c2360 4675 struct amdgpu_device *adev = ring->adev;
a2140e00 4676 struct vi_mqd *mqd = ring->mqd_ptr;
4e638ae9
XY
4677 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4678 uint32_t tmp;
4679
4680 mqd->header = 0xC0310800;
4681 mqd->compute_pipelinestat_enable = 0x00000001;
4682 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4683 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4684 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4685 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4686 mqd->compute_misc_reserved = 0x00000003;
925d5d79
AD
4687 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4688 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4689 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4690 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
34534610 4691 eop_base_addr = ring->eop_gpu_addr >> 8;
4e638ae9
XY
4692 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4693 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4694
4695 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4696 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4697 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
268cb4c7 4698 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4e638ae9
XY
4699
4700 mqd->cp_hqd_eop_control = tmp;
4701
4702 /* enable doorbell? */
bb215962
TSD
4703 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4704 CP_HQD_PQ_DOORBELL_CONTROL,
4705 DOORBELL_EN,
4706 ring->use_doorbell ? 1 : 0);
4e638ae9
XY
4707
4708 mqd->cp_hqd_pq_doorbell_control = tmp;
4709
4e638ae9 4710 /* set the pointer to the MQD */
015c2360
AD
4711 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4712 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4e638ae9
XY
4713
4714 /* set MQD vmid to 0 */
4715 tmp = RREG32(mmCP_MQD_CONTROL);
4716 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4717 mqd->cp_mqd_control = tmp;
4718
4719 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4720 hqd_gpu_addr = ring->gpu_addr >> 8;
4721 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4722 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4723
4724 /* set up the HQD, this is similar to CP_RB0_CNTL */
4725 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4726 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4727 (order_base_2(ring->ring_size / 4) - 1));
4728 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4729 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4730#ifdef __BIG_ENDIAN
4731 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4732#endif
4733 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4734 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4735 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4736 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4737 mqd->cp_hqd_pq_control = tmp;
4738
4739 /* set the wb address whether it's enabled or not */
4740 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4741 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4742 mqd->cp_hqd_pq_rptr_report_addr_hi =
4743 upper_32_bits(wb_gpu_addr) & 0xffff;
4744
4745 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4746 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4747 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4748 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4749
4750 tmp = 0;
4751 /* enable the doorbell if requested */
4752 if (ring->use_doorbell) {
4753 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4754 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4755 DOORBELL_OFFSET, ring->doorbell_index);
4756
4757 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4758 DOORBELL_EN, 1);
4759 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4760 DOORBELL_SOURCE, 0);
4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4762 DOORBELL_HIT, 0);
4763 }
4764
4765 mqd->cp_hqd_pq_doorbell_control = tmp;
4766
4767 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4768 ring->wptr = 0;
4769 mqd->cp_hqd_pq_wptr = ring->wptr;
4770 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4771
4772 /* set the vmid for the queue */
4773 mqd->cp_hqd_vmid = 0;
4774
4775 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4776 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4777 mqd->cp_hqd_persistent_state = tmp;
4778
ed6f55d1
AD
4779 /* set MTYPE */
4780 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4781 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4782 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4783 mqd->cp_hqd_ib_control = tmp;
4784
4785 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4786 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4787 mqd->cp_hqd_iq_timer = tmp;
4788
4789 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4790 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4791 mqd->cp_hqd_ctx_save_control = tmp;
4792
97bf47b2
AR
4793 /* defaults */
4794 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4795 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4796 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4797 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4798 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4799 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4800 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4801 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4802 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4803 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4804 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4805 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4806 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4807 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4808 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4809
4e638ae9
XY
4810 /* activate the queue */
4811 mqd->cp_hqd_active = 1;
4812
4813 return 0;
4814}
4815
97bf47b2
AR
4816int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4817 struct vi_mqd *mqd)
4e638ae9 4818{
894700f3
AR
4819 uint32_t mqd_reg;
4820 uint32_t *mqd_data;
4e638ae9 4821
894700f3
AR
4822 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4823 mqd_data = &mqd->cp_mqd_base_addr_lo;
4e638ae9
XY
4824
4825 /* disable wptr polling */
0ac642c5 4826 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4e638ae9 4827
894700f3 4828 /* program all HQD registers */
ecd910eb
AR
4829 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4830 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4e638ae9 4831
ecd910eb
AR
4832 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4833 * This is safe since EOP RPTR==WPTR for any inactive HQD
4834 * on ASICs that do not support context-save.
4835 * EOP writes/reads can start anywhere in the ring.
4836 */
4837 if (adev->asic_type != CHIP_TONGA) {
4838 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4839 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4840 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4e638ae9
XY
4841 }
4842
ecd910eb 4843 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
894700f3 4844 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4e638ae9 4845
894700f3
AR
4846 /* activate the HQD */
4847 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4848 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4e638ae9 4849
4e638ae9
XY
4850 return 0;
4851}
4e638ae9 4852
a2140e00 4853static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4e638ae9
XY
4854{
4855 struct amdgpu_device *adev = ring->adev;
a2140e00 4856 struct vi_mqd *mqd = ring->mqd_ptr;
1fb37a3d 4857 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4e638ae9 4858
39300115 4859 gfx_v8_0_kiq_setting(ring);
4e638ae9 4860
13a752e3 4861 if (adev->in_gpu_reset) { /* for GPU_RESET case */
1fb37a3d
ML
4862 /* reset MQD to a clean status */
4863 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4864 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4e638ae9 4865
1fb37a3d
ML
4866 /* reset ring buffer */
4867 ring->wptr = 0;
4868 amdgpu_ring_clear_ring(ring);
39300115
AD
4869 mutex_lock(&adev->srbm_mutex);
4870 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
97bf47b2 4871 gfx_v8_0_mqd_commit(adev, mqd);
39300115
AD
4872 vi_srbm_select(adev, 0, 0, 0, 0);
4873 mutex_unlock(&adev->srbm_mutex);
a545e491 4874 } else {
6b0fa871 4875 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
2d6fb105
AD
4876 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4877 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
a545e491
AD
4878 mutex_lock(&adev->srbm_mutex);
4879 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4880 gfx_v8_0_mqd_init(ring);
97bf47b2 4881 gfx_v8_0_mqd_commit(adev, mqd);
a545e491
AD
4882 vi_srbm_select(adev, 0, 0, 0, 0);
4883 mutex_unlock(&adev->srbm_mutex);
4e638ae9 4884
a545e491 4885 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4886 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
1fb37a3d 4887 }
4e638ae9 4888
dcf75843 4889 return 0;
4e638ae9
XY
4890}
4891
39300115 4892static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4e638ae9
XY
4893{
4894 struct amdgpu_device *adev = ring->adev;
a2140e00 4895 struct vi_mqd *mqd = ring->mqd_ptr;
39300115 4896 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4e638ae9 4897
13a752e3 4898 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
6b0fa871 4899 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
2d6fb105
AD
4900 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4901 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
1fb37a3d
ML
4902 mutex_lock(&adev->srbm_mutex);
4903 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
a2140e00 4904 gfx_v8_0_mqd_init(ring);
1fb37a3d
ML
4905 vi_srbm_select(adev, 0, 0, 0, 0);
4906 mutex_unlock(&adev->srbm_mutex);
4907
4908 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4909 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
13a752e3 4910 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
1fb37a3d
ML
4911 /* reset MQD to a clean status */
4912 if (adev->gfx.mec.mqd_backup[mqd_idx])
6b0fa871 4913 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
062d2e6a
AG
4914 /* reset ring buffer */
4915 ring->wptr = 0;
4916 amdgpu_ring_clear_ring(ring);
94c9cead
RZ
4917 } else {
4918 amdgpu_ring_clear_ring(ring);
1fb37a3d 4919 }
4e638ae9
XY
4920 return 0;
4921}
4922
4f339b29
RZ
4923static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4924{
4925 if (adev->asic_type > CHIP_TONGA) {
4926 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4927 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4928 }
6a124e67
AD
4929 /* enable doorbells */
4930 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4f339b29
RZ
4931}
4932
596c67d0 4933static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4e638ae9
XY
4934{
4935 struct amdgpu_ring *ring = NULL;
596c67d0 4936 int r = 0, i;
4e638ae9 4937
596c67d0 4938 gfx_v8_0_cp_compute_enable(adev, true);
4e638ae9
XY
4939
4940 ring = &adev->gfx.kiq.ring;
6a6f380f
AD
4941
4942 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4943 if (unlikely(r != 0))
4944 goto done;
4945
4946 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4947 if (!r) {
a2140e00 4948 r = gfx_v8_0_kiq_init_queue(ring);
596c67d0 4949 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4950 ring->mqd_ptr = NULL;
4e638ae9 4951 }
6a6f380f
AD
4952 amdgpu_bo_unreserve(ring->mqd_obj);
4953 if (r)
4954 goto done;
4e638ae9 4955
4e638ae9
XY
4956 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4957 ring = &adev->gfx.compute_ring[i];
6a6f380f
AD
4958
4959 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4960 if (unlikely(r != 0))
4961 goto done;
4962 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4963 if (!r) {
39300115 4964 r = gfx_v8_0_kcq_init_queue(ring);
596c67d0 4965 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4966 ring->mqd_ptr = NULL;
596c67d0 4967 }
6a6f380f
AD
4968 amdgpu_bo_unreserve(ring->mqd_obj);
4969 if (r)
4970 goto done;
4e638ae9
XY
4971 }
4972
4f339b29 4973 gfx_v8_0_set_mec_doorbell_range(adev);
4e638ae9 4974
346586d5 4975 r = gfx_v8_0_kiq_kcq_enable(adev);
c3a49ab5
AD
4976 if (r)
4977 goto done;
aaa36a97 4978
346586d5
AD
4979 /* Test KIQ */
4980 ring = &adev->gfx.kiq.ring;
4981 ring->ready = true;
4982 r = amdgpu_ring_test_ring(ring);
4983 if (r) {
4984 ring->ready = false;
4985 goto done;
aaa36a97
AD
4986 }
4987
346586d5 4988 /* Test KCQs */
aaa36a97 4989 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
c3a49ab5 4990 ring = &adev->gfx.compute_ring[i];
aaa36a97
AD
4991 ring->ready = true;
4992 r = amdgpu_ring_test_ring(ring);
4993 if (r)
4994 ring->ready = false;
4995 }
4996
6a6f380f
AD
4997done:
4998 return r;
aaa36a97
AD
4999}
5000
5001static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5002{
5003 int r;
5004
e3c7656c 5005 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
5006 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5007
790d84fd 5008 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
ba5c2a87 5009 /* legacy firmware loading */
790d84fd
RZ
5010 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5011 if (r)
5012 return r;
aaa36a97 5013
790d84fd
RZ
5014 r = gfx_v8_0_cp_compute_load_microcode(adev);
5015 if (r)
5016 return r;
aaa36a97
AD
5017 }
5018
5019 r = gfx_v8_0_cp_gfx_resume(adev);
5020 if (r)
5021 return r;
5022
b4e40676 5023 r = gfx_v8_0_kiq_resume(adev);
aaa36a97
AD
5024 if (r)
5025 return r;
5026
5027 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5028
5029 return 0;
5030}
5031
5032static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5033{
5034 gfx_v8_0_cp_gfx_enable(adev, enable);
5035 gfx_v8_0_cp_compute_enable(adev, enable);
5036}
5037
5fc3aeeb 5038static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
5039{
5040 int r;
5fc3aeeb 5041 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5042
5043 gfx_v8_0_init_golden_registers(adev);
aaa36a97
AD
5044 gfx_v8_0_gpu_init(adev);
5045
5046 r = gfx_v8_0_rlc_resume(adev);
5047 if (r)
5048 return r;
5049
5050 r = gfx_v8_0_cp_resume(adev);
aaa36a97
AD
5051
5052 return r;
5053}
5054
85f95ad6
ML
5055static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5056{
5057 struct amdgpu_device *adev = kiq_ring->adev;
5058 uint32_t scratch, tmp = 0;
5059 int r, i;
5060
5061 r = amdgpu_gfx_scratch_get(adev, &scratch);
5062 if (r) {
5063 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5064 return r;
5065 }
5066 WREG32(scratch, 0xCAFEDEAD);
5067
5068 r = amdgpu_ring_alloc(kiq_ring, 10);
5069 if (r) {
5070 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5071 amdgpu_gfx_scratch_free(adev, scratch);
5072 return r;
5073 }
5074
5075 /* unmap queues */
5076 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5077 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5078 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5079 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5080 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5081 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5082 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5083 amdgpu_ring_write(kiq_ring, 0);
5084 amdgpu_ring_write(kiq_ring, 0);
5085 amdgpu_ring_write(kiq_ring, 0);
5086 /* write to scratch for completion */
5087 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5088 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5089 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5090 amdgpu_ring_commit(kiq_ring);
5091
5092 for (i = 0; i < adev->usec_timeout; i++) {
5093 tmp = RREG32(scratch);
5094 if (tmp == 0xDEADBEEF)
5095 break;
5096 DRM_UDELAY(1);
5097 }
5098 if (i >= adev->usec_timeout) {
5099 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5100 r = -EINVAL;
5101 }
5102 amdgpu_gfx_scratch_free(adev, scratch);
5103 return r;
5104}
5105
5fc3aeeb 5106static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 5107{
5fc3aeeb 5108 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
85f95ad6 5109 int i;
5fc3aeeb 5110
1d22a454
AD
5111 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5112 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
85f95ad6
ML
5113
5114 /* disable KCQ to avoid CPC touch memory not valid anymore */
5115 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5116 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5117
84f3f05b
XY
5118 if (amdgpu_sriov_vf(adev)) {
5119 pr_debug("For SRIOV client, shouldn't do anything.\n");
5120 return 0;
5121 }
aaa36a97
AD
5122 gfx_v8_0_cp_enable(adev, false);
5123 gfx_v8_0_rlc_stop(adev);
aaa36a97 5124
2990a1fc
AD
5125 amdgpu_device_ip_set_powergating_state(adev,
5126 AMD_IP_BLOCK_TYPE_GFX,
5127 AMD_PG_STATE_UNGATE);
62a86fc2 5128
aaa36a97
AD
5129 return 0;
5130}
5131
5fc3aeeb 5132static int gfx_v8_0_suspend(void *handle)
aaa36a97 5133{
5fc3aeeb 5134 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
b4e40676 5135 adev->gfx.in_suspend = true;
aaa36a97
AD
5136 return gfx_v8_0_hw_fini(adev);
5137}
5138
5fc3aeeb 5139static int gfx_v8_0_resume(void *handle)
aaa36a97 5140{
b4e40676 5141 int r;
5fc3aeeb 5142 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5143
b4e40676
DP
5144 r = gfx_v8_0_hw_init(adev);
5145 adev->gfx.in_suspend = false;
5146 return r;
aaa36a97
AD
5147}
5148
5fc3aeeb 5149static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5150{
5fc3aeeb 5151 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5152
aaa36a97
AD
5153 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5154 return false;
5155 else
5156 return true;
5157}
5158
5fc3aeeb 5159static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5160{
5161 unsigned i;
5fc3aeeb 5162 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5163
5164 for (i = 0; i < adev->usec_timeout; i++) {
5003f278 5165 if (gfx_v8_0_is_idle(handle))
aaa36a97 5166 return 0;
5003f278 5167
aaa36a97
AD
5168 udelay(1);
5169 }
5170 return -ETIMEDOUT;
5171}
5172
da146d3b 5173static bool gfx_v8_0_check_soft_reset(void *handle)
aaa36a97 5174{
3d7c6384 5175 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5176 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5177 u32 tmp;
5178
5179 /* GRBM_STATUS */
5180 tmp = RREG32(mmGRBM_STATUS);
5181 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5182 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5183 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5184 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5185 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3d7c6384
CZ
5186 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5187 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
aaa36a97
AD
5188 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5189 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5190 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5191 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
aaa36a97
AD
5192 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5193 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5194 }
5195
5196 /* GRBM_STATUS2 */
5197 tmp = RREG32(mmGRBM_STATUS2);
5198 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5199 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5200 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5201
3d7c6384
CZ
5202 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5203 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5204 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5205 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5206 SOFT_RESET_CPF, 1);
5207 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5208 SOFT_RESET_CPC, 1);
5209 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5210 SOFT_RESET_CPG, 1);
5211 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5212 SOFT_RESET_GRBM, 1);
5213 }
5214
aaa36a97
AD
5215 /* SRBM_STATUS */
5216 tmp = RREG32(mmSRBM_STATUS);
5217 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5218 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5219 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3d7c6384
CZ
5220 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5221 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5222 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
aaa36a97
AD
5223
5224 if (grbm_soft_reset || srbm_soft_reset) {
3d7c6384
CZ
5225 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5226 adev->gfx.srbm_soft_reset = srbm_soft_reset;
da146d3b 5227 return true;
3d7c6384 5228 } else {
3d7c6384
CZ
5229 adev->gfx.grbm_soft_reset = 0;
5230 adev->gfx.srbm_soft_reset = 0;
da146d3b 5231 return false;
3d7c6384 5232 }
3d7c6384 5233}
aaa36a97 5234
1057f20c
CZ
5235static int gfx_v8_0_pre_soft_reset(void *handle)
5236{
5237 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5238 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5239
da146d3b
AD
5240 if ((!adev->gfx.grbm_soft_reset) &&
5241 (!adev->gfx.srbm_soft_reset))
1057f20c
CZ
5242 return 0;
5243
5244 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5245 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5246
5247 /* stop the rlc */
5248 gfx_v8_0_rlc_stop(adev);
5249
5250 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5251 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
aaa36a97
AD
5252 /* Disable GFX parsing/prefetching */
5253 gfx_v8_0_cp_gfx_enable(adev, false);
5254
1057f20c
CZ
5255 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5256 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5257 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5258 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5259 int i;
5260
5261 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5262 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5263
a99f249d
AD
5264 mutex_lock(&adev->srbm_mutex);
5265 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5266 gfx_v8_0_deactivate_hqd(adev, 2);
5267 vi_srbm_select(adev, 0, 0, 0, 0);
5268 mutex_unlock(&adev->srbm_mutex);
1057f20c 5269 }
aaa36a97 5270 /* Disable MEC parsing/prefetching */
7776a693 5271 gfx_v8_0_cp_compute_enable(adev, false);
1057f20c 5272 }
7776a693 5273
1057f20c
CZ
5274 return 0;
5275}
7776a693 5276
3d7c6384
CZ
5277static int gfx_v8_0_soft_reset(void *handle)
5278{
5279 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5280 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5281 u32 tmp;
aaa36a97 5282
da146d3b
AD
5283 if ((!adev->gfx.grbm_soft_reset) &&
5284 (!adev->gfx.srbm_soft_reset))
3d7c6384 5285 return 0;
aaa36a97 5286
3d7c6384
CZ
5287 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5288 srbm_soft_reset = adev->gfx.srbm_soft_reset;
aaa36a97 5289
3d7c6384
CZ
5290 if (grbm_soft_reset || srbm_soft_reset) {
5291 tmp = RREG32(mmGMCON_DEBUG);
5292 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5293 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5294 WREG32(mmGMCON_DEBUG, tmp);
5295 udelay(50);
5296 }
aaa36a97 5297
3d7c6384
CZ
5298 if (grbm_soft_reset) {
5299 tmp = RREG32(mmGRBM_SOFT_RESET);
5300 tmp |= grbm_soft_reset;
5301 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5302 WREG32(mmGRBM_SOFT_RESET, tmp);
5303 tmp = RREG32(mmGRBM_SOFT_RESET);
aaa36a97 5304
3d7c6384 5305 udelay(50);
aaa36a97 5306
3d7c6384
CZ
5307 tmp &= ~grbm_soft_reset;
5308 WREG32(mmGRBM_SOFT_RESET, tmp);
5309 tmp = RREG32(mmGRBM_SOFT_RESET);
5310 }
7776a693 5311
3d7c6384
CZ
5312 if (srbm_soft_reset) {
5313 tmp = RREG32(mmSRBM_SOFT_RESET);
5314 tmp |= srbm_soft_reset;
5315 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5316 WREG32(mmSRBM_SOFT_RESET, tmp);
5317 tmp = RREG32(mmSRBM_SOFT_RESET);
7776a693 5318
aaa36a97 5319 udelay(50);
7776a693 5320
3d7c6384
CZ
5321 tmp &= ~srbm_soft_reset;
5322 WREG32(mmSRBM_SOFT_RESET, tmp);
5323 tmp = RREG32(mmSRBM_SOFT_RESET);
aaa36a97 5324 }
7776a693 5325
3d7c6384
CZ
5326 if (grbm_soft_reset || srbm_soft_reset) {
5327 tmp = RREG32(mmGMCON_DEBUG);
5328 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5329 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5330 WREG32(mmGMCON_DEBUG, tmp);
aaa36a97 5331 }
3d7c6384
CZ
5332
5333 /* Wait a little for things to settle down */
5334 udelay(50);
5335
aaa36a97
AD
5336 return 0;
5337}
5338
e4ae0fc3
CZ
5339static int gfx_v8_0_post_soft_reset(void *handle)
5340{
5341 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5342 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343
da146d3b
AD
5344 if ((!adev->gfx.grbm_soft_reset) &&
5345 (!adev->gfx.srbm_soft_reset))
e4ae0fc3
CZ
5346 return 0;
5347
5348 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5349 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5350
5351 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5352 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5353 gfx_v8_0_cp_gfx_resume(adev);
5354
5355 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5356 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5357 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5358 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5359 int i;
5360
5361 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5362 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5363
a99f249d
AD
5364 mutex_lock(&adev->srbm_mutex);
5365 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5366 gfx_v8_0_deactivate_hqd(adev, 2);
5367 vi_srbm_select(adev, 0, 0, 0, 0);
5368 mutex_unlock(&adev->srbm_mutex);
e4ae0fc3 5369 }
b4e40676 5370 gfx_v8_0_kiq_resume(adev);
e4ae0fc3
CZ
5371 }
5372 gfx_v8_0_rlc_start(adev);
5373
aaa36a97
AD
5374 return 0;
5375}
5376
5377/**
5378 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5379 *
5380 * @adev: amdgpu_device pointer
5381 *
5382 * Fetches a GPU clock counter snapshot.
5383 * Returns the 64 bit clock counter snapshot.
5384 */
b95e31fd 5385static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5386{
5387 uint64_t clock;
5388
5389 mutex_lock(&adev->gfx.gpu_clock_mutex);
5390 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5391 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5392 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5393 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5394 return clock;
5395}
5396
5397static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5398 uint32_t vmid,
5399 uint32_t gds_base, uint32_t gds_size,
5400 uint32_t gws_base, uint32_t gws_size,
5401 uint32_t oa_base, uint32_t oa_size)
5402{
5403 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5404 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5405
5406 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5407 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5408
5409 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5410 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5411
5412 /* GDS Base */
5413 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5414 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5415 WRITE_DATA_DST_SEL(0)));
5416 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5417 amdgpu_ring_write(ring, 0);
5418 amdgpu_ring_write(ring, gds_base);
5419
5420 /* GDS Size */
5421 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5422 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5423 WRITE_DATA_DST_SEL(0)));
5424 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5425 amdgpu_ring_write(ring, 0);
5426 amdgpu_ring_write(ring, gds_size);
5427
5428 /* GWS */
5429 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5430 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5431 WRITE_DATA_DST_SEL(0)));
5432 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5433 amdgpu_ring_write(ring, 0);
5434 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5435
5436 /* OA */
5437 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5438 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5439 WRITE_DATA_DST_SEL(0)));
5440 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5441 amdgpu_ring_write(ring, 0);
5442 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5443}
5444
472259f0
TSD
5445static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5446{
bc24fbe9
TSD
5447 WREG32(mmSQ_IND_INDEX,
5448 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5449 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5450 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5451 (SQ_IND_INDEX__FORCE_READ_MASK));
472259f0
TSD
5452 return RREG32(mmSQ_IND_DATA);
5453}
5454
c5a60ce8
TSD
5455static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5456 uint32_t wave, uint32_t thread,
5457 uint32_t regno, uint32_t num, uint32_t *out)
5458{
5459 WREG32(mmSQ_IND_INDEX,
5460 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5461 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5462 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5463 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5464 (SQ_IND_INDEX__FORCE_READ_MASK) |
5465 (SQ_IND_INDEX__AUTO_INCR_MASK));
5466 while (num--)
5467 *(out++) = RREG32(mmSQ_IND_DATA);
5468}
5469
472259f0
TSD
5470static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5471{
5472 /* type 0 wave data */
5473 dst[(*no_fields)++] = 0;
5474 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5475 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5476 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5477 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5478 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5479 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5480 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5481 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5482 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5483 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5484 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5485 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
74f3ce31
TSD
5486 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5487 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5488 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5489 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5490 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5491 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
472259f0
TSD
5492}
5493
c5a60ce8
TSD
5494static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5495 uint32_t wave, uint32_t start,
5496 uint32_t size, uint32_t *dst)
5497{
5498 wave_read_regs(
5499 adev, simd, wave, 0,
5500 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5501}
5502
472259f0 5503
b95e31fd
AD
5504static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5505 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5506 .select_se_sh = &gfx_v8_0_select_se_sh,
472259f0 5507 .read_wave_data = &gfx_v8_0_read_wave_data,
c5a60ce8 5508 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
f7a9ee81 5509 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
b95e31fd
AD
5510};
5511
5fc3aeeb 5512static int gfx_v8_0_early_init(void *handle)
aaa36a97 5513{
5fc3aeeb 5514 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5515
5516 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
78c16834 5517 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
b95e31fd 5518 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5519 gfx_v8_0_set_ring_funcs(adev);
5520 gfx_v8_0_set_irq_funcs(adev);
5521 gfx_v8_0_set_gds_init(adev);
dbff57bc 5522 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5523
5524 return 0;
5525}
5526
ccba7691
AD
5527static int gfx_v8_0_late_init(void *handle)
5528{
5529 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5530 int r;
5531
1d22a454
AD
5532 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5533 if (r)
5534 return r;
5535
5536 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5537 if (r)
5538 return r;
5539
ccba7691
AD
5540 /* requires IBs so do in late init after IB pool is initialized */
5541 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5542 if (r)
5543 return r;
5544
2990a1fc
AD
5545 amdgpu_device_ip_set_powergating_state(adev,
5546 AMD_IP_BLOCK_TYPE_GFX,
5547 AMD_PG_STATE_GATE);
62a86fc2 5548
ccba7691
AD
5549 return 0;
5550}
5551
c2546f55
AD
5552static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5553 bool enable)
62a86fc2 5554{
c4642a47 5555 if ((adev->asic_type == CHIP_POLARIS11) ||
71765469
LL
5556 (adev->asic_type == CHIP_POLARIS12) ||
5557 (adev->asic_type == CHIP_VEGAM))
c2546f55 5558 /* Send msg to SMU via Powerplay */
2990a1fc
AD
5559 amdgpu_device_ip_set_powergating_state(adev,
5560 AMD_IP_BLOCK_TYPE_SMC,
5561 enable ?
5562 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
62a86fc2 5563
61cb8cef 5564 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5565}
5566
c2546f55
AD
5567static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5568 bool enable)
62a86fc2 5569{
61cb8cef 5570 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5571}
5572
2cc0c0b5 5573static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5574 bool enable)
5575{
61cb8cef 5576 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5577}
5578
2c547165
AD
5579static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5580 bool enable)
5581{
61cb8cef 5582 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
2c547165
AD
5583}
5584
5585static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5586 bool enable)
5587{
61cb8cef 5588 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
2c547165
AD
5589
5590 /* Read any GFX register to wake up GFX. */
5591 if (!enable)
61cb8cef 5592 RREG32(mmDB_RENDER_CONTROL);
2c547165
AD
5593}
5594
5595static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5596 bool enable)
5597{
5598 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5599 cz_enable_gfx_cg_power_gating(adev, true);
5600 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5601 cz_enable_gfx_pipeline_power_gating(adev, true);
5602 } else {
5603 cz_enable_gfx_cg_power_gating(adev, false);
5604 cz_enable_gfx_pipeline_power_gating(adev, false);
5605 }
5606}
5607
5fc3aeeb 5608static int gfx_v8_0_set_powergating_state(void *handle,
5609 enum amd_powergating_state state)
aaa36a97 5610{
62a86fc2 5611 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7e913664 5612 bool enable = (state == AMD_PG_STATE_GATE);
62a86fc2 5613
ce137c04
ML
5614 if (amdgpu_sriov_vf(adev))
5615 return 0;
5616
62a86fc2 5617 switch (adev->asic_type) {
2c547165
AD
5618 case CHIP_CARRIZO:
5619 case CHIP_STONEY:
ad1830d5 5620
5c964221
RZ
5621 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5622 cz_enable_sck_slow_down_on_power_up(adev, true);
5623 cz_enable_sck_slow_down_on_power_down(adev, true);
5624 } else {
5625 cz_enable_sck_slow_down_on_power_up(adev, false);
5626 cz_enable_sck_slow_down_on_power_down(adev, false);
5627 }
5628 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5629 cz_enable_cp_power_gating(adev, true);
5630 else
5631 cz_enable_cp_power_gating(adev, false);
5632
ad1830d5 5633 cz_update_gfx_cg_power_gating(adev, enable);
2c547165
AD
5634
5635 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5636 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5637 else
5638 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5639
5640 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5641 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5642 else
5643 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5644 break;
2cc0c0b5 5645 case CHIP_POLARIS11:
c4642a47 5646 case CHIP_POLARIS12:
71765469 5647 case CHIP_VEGAM:
7ba0eb6d
AD
5648 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5649 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5650 else
5651 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5652
5653 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5654 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5655 else
5656 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5657
5658 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5659 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5660 else
7ba0eb6d 5661 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5662 break;
5663 default:
5664 break;
5665 }
5666
aaa36a97
AD
5667 return 0;
5668}
5669
ebd843d6
HR
5670static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5671{
5672 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5673 int data;
5674
ce137c04
ML
5675 if (amdgpu_sriov_vf(adev))
5676 *flags = 0;
5677
ebd843d6
HR
5678 /* AMD_CG_SUPPORT_GFX_MGCG */
5679 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5680 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5681 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5682
5683 /* AMD_CG_SUPPORT_GFX_CGLG */
5684 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5685 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5686 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5687
5688 /* AMD_CG_SUPPORT_GFX_CGLS */
5689 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5690 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5691
5692 /* AMD_CG_SUPPORT_GFX_CGTS */
5693 data = RREG32(mmCGTS_SM_CTRL_REG);
5694 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5695 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5696
5697 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5698 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5699 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5700
5701 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5702 data = RREG32(mmRLC_MEM_SLP_CNTL);
5703 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5704 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5705
5706 /* AMD_CG_SUPPORT_GFX_CP_LS */
5707 data = RREG32(mmCP_MEM_SLP_CNTL);
5708 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5709 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5710}
5711
79deaaf4 5712static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5713 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5714{
5715 uint32_t data;
5716
9559ef5b 5717 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5718
5719 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5720 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5721
5722 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f 5723 if (adev->asic_type == CHIP_STONEY)
62d2ce4b
TSD
5724 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5725 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5726 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5727 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5728 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5729 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5730 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5731 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5732 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5733 else
5734 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5735 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5736 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5737 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5738 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5739 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5740 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5741 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5742 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5743 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5744 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5745 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5746 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5747 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5748 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5749
5750 WREG32(mmRLC_SERDES_WR_CTRL, data);
5751}
5752
dbff57bc
AD
5753#define MSG_ENTER_RLC_SAFE_MODE 1
5754#define MSG_EXIT_RLC_SAFE_MODE 0
61cb8cef
TSD
5755#define RLC_GPR_REG2__REQ_MASK 0x00000001
5756#define RLC_GPR_REG2__REQ__SHIFT 0
5757#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5758#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
dbff57bc 5759
dbff57bc
AD
5760static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5761{
5762 u32 data;
5763 unsigned i;
5764
5765 data = RREG32(mmRLC_CNTL);
5766 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5767 return;
5768
5769 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5770 data |= RLC_SAFE_MODE__CMD_MASK;
5771 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5772 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5773 WREG32(mmRLC_SAFE_MODE, data);
5774
5775 for (i = 0; i < adev->usec_timeout; i++) {
5776 if ((RREG32(mmRLC_GPM_STAT) &
5777 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5778 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5779 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5780 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5781 break;
5782 udelay(1);
5783 }
5784
5785 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5786 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5787 break;
5788 udelay(1);
5789 }
5790 adev->gfx.rlc.in_safe_mode = true;
5791 }
5792}
5793
5794static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5795{
5796 u32 data = 0;
5797 unsigned i;
5798
5799 data = RREG32(mmRLC_CNTL);
5800 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5801 return;
5802
5803 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5804 if (adev->gfx.rlc.in_safe_mode) {
5805 data |= RLC_SAFE_MODE__CMD_MASK;
5806 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5807 WREG32(mmRLC_SAFE_MODE, data);
5808 adev->gfx.rlc.in_safe_mode = false;
5809 }
5810 }
5811
5812 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 5813 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
5814 break;
5815 udelay(1);
5816 }
5817}
5818
dbff57bc
AD
5819static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5820 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5821 .exit_safe_mode = iceland_exit_rlc_safe_mode
5822};
5823
dbff57bc
AD
5824static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5825 bool enable)
6e378858
EH
5826{
5827 uint32_t temp, data;
5828
dbff57bc
AD
5829 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5830
6e378858 5831 /* It is disabled by HW by default */
14698b6c
AD
5832 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5833 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
61cb8cef 5834 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
14698b6c 5835 /* 1 - RLC memory Light sleep */
61cb8cef 5836 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6e378858 5837
61cb8cef
TSD
5838 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5839 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
14698b6c 5840 }
6e378858
EH
5841
5842 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5843 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
5844 if (adev->flags & AMD_IS_APU)
5845 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5846 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5847 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5848 else
5849 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5850 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5851 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5852 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
5853
5854 if (temp != data)
5855 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5856
5857 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5858 gfx_v8_0_wait_for_rlc_serdes(adev);
5859
5860 /* 5 - clear mgcg override */
79deaaf4 5861 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 5862
14698b6c
AD
5863 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5864 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5865 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5866 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5867 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5868 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5869 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5870 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5871 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5872 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5873 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5874 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5875 if (temp != data)
5876 WREG32(mmCGTS_SM_CTRL_REG, data);
5877 }
6e378858
EH
5878 udelay(50);
5879
5880 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5881 gfx_v8_0_wait_for_rlc_serdes(adev);
5882 } else {
5883 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5884 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5885 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5886 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5887 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5888 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5889 if (temp != data)
5890 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5891
5892 /* 2 - disable MGLS in RLC */
5893 data = RREG32(mmRLC_MEM_SLP_CNTL);
5894 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5895 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5896 WREG32(mmRLC_MEM_SLP_CNTL, data);
5897 }
5898
5899 /* 3 - disable MGLS in CP */
5900 data = RREG32(mmCP_MEM_SLP_CNTL);
5901 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5902 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5903 WREG32(mmCP_MEM_SLP_CNTL, data);
5904 }
5905
5906 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5907 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5908 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5909 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5910 if (temp != data)
5911 WREG32(mmCGTS_SM_CTRL_REG, data);
5912
5913 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5914 gfx_v8_0_wait_for_rlc_serdes(adev);
5915
5916 /* 6 - set mgcg override */
79deaaf4 5917 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
5918
5919 udelay(50);
5920
5921 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5922 gfx_v8_0_wait_for_rlc_serdes(adev);
5923 }
dbff57bc
AD
5924
5925 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
5926}
5927
dbff57bc
AD
5928static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5929 bool enable)
6e378858
EH
5930{
5931 uint32_t temp, temp1, data, data1;
5932
5933 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5934
dbff57bc
AD
5935 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5936
14698b6c 5937 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
5938 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5939 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5940 if (temp1 != data1)
5941 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5942
dd31ae9a 5943 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6e378858
EH
5944 gfx_v8_0_wait_for_rlc_serdes(adev);
5945
dd31ae9a 5946 /* 2 - clear cgcg override */
79deaaf4 5947 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
5948
5949 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5950 gfx_v8_0_wait_for_rlc_serdes(adev);
5951
dd31ae9a 5952 /* 3 - write cmd to set CGLS */
79deaaf4 5953 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858 5954
dd31ae9a 5955 /* 4 - enable cgcg */
6e378858
EH
5956 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5957
14698b6c
AD
5958 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5959 /* enable cgls*/
5960 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 5961
14698b6c
AD
5962 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5963 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 5964
14698b6c
AD
5965 if (temp1 != data1)
5966 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5967 } else {
5968 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5969 }
6e378858
EH
5970
5971 if (temp != data)
5972 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
dd31ae9a
AN
5973
5974 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5975 * Cmp_busy/GFX_Idle interrupts
5976 */
5977 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858
EH
5978 } else {
5979 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5980 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5981
5982 /* TEST CGCG */
5983 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5984 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5985 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5986 if (temp1 != data1)
5987 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5988
5989 /* read gfx register to wake up cgcg */
5990 RREG32(mmCB_CGTT_SCLK_CTRL);
5991 RREG32(mmCB_CGTT_SCLK_CTRL);
5992 RREG32(mmCB_CGTT_SCLK_CTRL);
5993 RREG32(mmCB_CGTT_SCLK_CTRL);
5994
5995 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5996 gfx_v8_0_wait_for_rlc_serdes(adev);
5997
5998 /* write cmd to Set CGCG Overrride */
79deaaf4 5999 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6000
6001 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6002 gfx_v8_0_wait_for_rlc_serdes(adev);
6003
6004 /* write cmd to Clear CGLS */
79deaaf4 6005 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
6006
6007 /* disable cgcg, cgls should be disabled too. */
6008 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 6009 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
6010 if (temp != data)
6011 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
d5dc36a4
AD
6012 /* enable interrupts again for PG */
6013 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858 6014 }
dbff57bc 6015
7894745a
TSD
6016 gfx_v8_0_wait_for_rlc_serdes(adev);
6017
dbff57bc 6018 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 6019}
dbff57bc
AD
6020static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6021 bool enable)
6e378858
EH
6022{
6023 if (enable) {
6024 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6025 * === MGCG + MGLS + TS(CG/LS) ===
6026 */
dbff57bc
AD
6027 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6028 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
6029 } else {
6030 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6031 * === CGCG + CGLS ===
6032 */
dbff57bc
AD
6033 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6034 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
6035 }
6036 return 0;
6037}
6038
a8ca3413
RZ
6039static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6040 enum amd_clockgating_state state)
6041{
8a19e7fa
RZ
6042 uint32_t msg_id, pp_state = 0;
6043 uint32_t pp_support_state = 0;
a8ca3413 6044
8a19e7fa
RZ
6045 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6046 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6047 pp_support_state = PP_STATE_SUPPORT_LS;
6048 pp_state = PP_STATE_LS;
6049 }
6050 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6051 pp_support_state |= PP_STATE_SUPPORT_CG;
6052 pp_state |= PP_STATE_CG;
6053 }
6054 if (state == AMD_CG_STATE_UNGATE)
6055 pp_state = 0;
6056
6057 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6058 PP_BLOCK_GFX_CG,
6059 pp_support_state,
6060 pp_state);
3811f8f0
RZ
6061 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6062 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6063 }
a8ca3413 6064
8a19e7fa
RZ
6065 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6066 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6067 pp_support_state = PP_STATE_SUPPORT_LS;
6068 pp_state = PP_STATE_LS;
6069 }
a8ca3413 6070
8a19e7fa
RZ
6071 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6072 pp_support_state |= PP_STATE_SUPPORT_CG;
6073 pp_state |= PP_STATE_CG;
6074 }
6075
6076 if (state == AMD_CG_STATE_UNGATE)
6077 pp_state = 0;
6078
6079 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6080 PP_BLOCK_GFX_MG,
6081 pp_support_state,
6082 pp_state);
3811f8f0
RZ
6083 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6084 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6085 }
a8ca3413
RZ
6086
6087 return 0;
6088}
6089
6090static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6091 enum amd_clockgating_state state)
6092{
8a19e7fa
RZ
6093
6094 uint32_t msg_id, pp_state = 0;
6095 uint32_t pp_support_state = 0;
a8ca3413 6096
8a19e7fa
RZ
6097 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6098 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6099 pp_support_state = PP_STATE_SUPPORT_LS;
6100 pp_state = PP_STATE_LS;
6101 }
6102 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6103 pp_support_state |= PP_STATE_SUPPORT_CG;
6104 pp_state |= PP_STATE_CG;
6105 }
6106 if (state == AMD_CG_STATE_UNGATE)
6107 pp_state = 0;
6108
6109 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6110 PP_BLOCK_GFX_CG,
6111 pp_support_state,
6112 pp_state);
3811f8f0
RZ
6113 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6114 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6115 }
a8ca3413 6116
8a19e7fa
RZ
6117 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6118 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6119 pp_support_state = PP_STATE_SUPPORT_LS;
6120 pp_state = PP_STATE_LS;
6121 }
6122 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6123 pp_support_state |= PP_STATE_SUPPORT_CG;
6124 pp_state |= PP_STATE_CG;
6125 }
6126 if (state == AMD_CG_STATE_UNGATE)
6127 pp_state = 0;
6128
6129 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6130 PP_BLOCK_GFX_3D,
6131 pp_support_state,
6132 pp_state);
3811f8f0
RZ
6133 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6134 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6135 }
a8ca3413 6136
8a19e7fa
RZ
6137 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6138 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6139 pp_support_state = PP_STATE_SUPPORT_LS;
6140 pp_state = PP_STATE_LS;
6141 }
a8ca3413 6142
8a19e7fa
RZ
6143 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6144 pp_support_state |= PP_STATE_SUPPORT_CG;
6145 pp_state |= PP_STATE_CG;
6146 }
a8ca3413 6147
8a19e7fa
RZ
6148 if (state == AMD_CG_STATE_UNGATE)
6149 pp_state = 0;
a8ca3413 6150
8a19e7fa
RZ
6151 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6152 PP_BLOCK_GFX_MG,
6153 pp_support_state,
6154 pp_state);
3811f8f0
RZ
6155 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6156 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa
RZ
6157 }
6158
6159 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6160 pp_support_state = PP_STATE_SUPPORT_LS;
6161
6162 if (state == AMD_CG_STATE_UNGATE)
6163 pp_state = 0;
6164 else
6165 pp_state = PP_STATE_LS;
6166
6167 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6168 PP_BLOCK_GFX_RLC,
6169 pp_support_state,
6170 pp_state);
3811f8f0
RZ
6171 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6172 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa
RZ
6173 }
6174
6175 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6176 pp_support_state = PP_STATE_SUPPORT_LS;
6177
6178 if (state == AMD_CG_STATE_UNGATE)
6179 pp_state = 0;
6180 else
6181 pp_state = PP_STATE_LS;
6182 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
a8ca3413 6183 PP_BLOCK_GFX_CP,
8a19e7fa 6184 pp_support_state,
a8ca3413 6185 pp_state);
3811f8f0
RZ
6186 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6187 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
8a19e7fa 6188 }
a8ca3413
RZ
6189
6190 return 0;
6191}
6192
5fc3aeeb 6193static int gfx_v8_0_set_clockgating_state(void *handle,
6194 enum amd_clockgating_state state)
aaa36a97 6195{
6e378858
EH
6196 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6197
ce137c04
ML
6198 if (amdgpu_sriov_vf(adev))
6199 return 0;
6200
6e378858
EH
6201 switch (adev->asic_type) {
6202 case CHIP_FIJI:
dbff57bc
AD
6203 case CHIP_CARRIZO:
6204 case CHIP_STONEY:
6205 gfx_v8_0_update_gfx_clock_gating(adev,
7e913664 6206 state == AMD_CG_STATE_GATE);
6e378858 6207 break;
a8ca3413
RZ
6208 case CHIP_TONGA:
6209 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6210 break;
6211 case CHIP_POLARIS10:
6212 case CHIP_POLARIS11:
739e9fff 6213 case CHIP_POLARIS12:
71765469 6214 case CHIP_VEGAM:
a8ca3413
RZ
6215 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6216 break;
6e378858
EH
6217 default:
6218 break;
6219 }
aaa36a97
AD
6220 return 0;
6221}
6222
536fbf94 6223static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
aaa36a97 6224{
5003f278 6225 return ring->adev->wb.wb[ring->rptr_offs];
aaa36a97
AD
6226}
6227
536fbf94 6228static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
aaa36a97
AD
6229{
6230 struct amdgpu_device *adev = ring->adev;
aaa36a97
AD
6231
6232 if (ring->use_doorbell)
6233 /* XXX check if swapping is necessary on BE */
5003f278 6234 return ring->adev->wb.wb[ring->wptr_offs];
aaa36a97 6235 else
5003f278 6236 return RREG32(mmCP_RB0_WPTR);
aaa36a97
AD
6237}
6238
6239static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6240{
6241 struct amdgpu_device *adev = ring->adev;
6242
6243 if (ring->use_doorbell) {
6244 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6245 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6246 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97 6247 } else {
536fbf94 6248 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
6249 (void)RREG32(mmCP_RB0_WPTR);
6250 }
6251}
6252
d2edb07b 6253static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
6254{
6255 u32 ref_and_mask, reg_mem_engine;
6256
4e638ae9
XY
6257 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6258 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
aaa36a97
AD
6259 switch (ring->me) {
6260 case 1:
6261 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6262 break;
6263 case 2:
6264 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6265 break;
6266 default:
6267 return;
6268 }
6269 reg_mem_engine = 0;
6270 } else {
6271 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6272 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6273 }
6274
6275 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6276 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6277 WAIT_REG_MEM_FUNCTION(3) | /* == */
6278 reg_mem_engine));
6279 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6280 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6281 amdgpu_ring_write(ring, ref_and_mask);
6282 amdgpu_ring_write(ring, ref_and_mask);
6283 amdgpu_ring_write(ring, 0x20); /* poll interval */
6284}
6285
45682886
ML
6286static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6287{
6288 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6289 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6290 EVENT_INDEX(4));
6291
6292 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6293 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6294 EVENT_INDEX(0));
6295}
6296
93323131 6297static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583 6298 struct amdgpu_ib *ib,
c4f46f22 6299 unsigned vmid, bool ctx_switch)
aaa36a97
AD
6300{
6301 u32 header, control = 0;
aaa36a97 6302
de807f81 6303 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
6304 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6305 else
6306 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6307
c4f46f22 6308 control |= ib->length_dw | (vmid << 24);
aaa36a97 6309
635e7132 6310 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
2e2e3c7f
ML
6311 control |= INDIRECT_BUFFER_PRE_ENB(1);
6312
635e7132
ML
6313 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6314 gfx_v8_0_ring_emit_de_meta(ring);
6315 }
6316
aaa36a97
AD
6317 amdgpu_ring_write(ring, header);
6318 amdgpu_ring_write(ring,
6319#ifdef __BIG_ENDIAN
6320 (2 << 0) |
6321#endif
6322 (ib->gpu_addr & 0xFFFFFFFC));
6323 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6324 amdgpu_ring_write(ring, control);
6325}
6326
93323131 6327static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583 6328 struct amdgpu_ib *ib,
c4f46f22 6329 unsigned vmid, bool ctx_switch)
93323131 6330{
c4f46f22 6331 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
93323131 6332
33b7ed01 6333 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 6334 amdgpu_ring_write(ring,
6335#ifdef __BIG_ENDIAN
62d2ce4b 6336 (2 << 0) |
93323131 6337#endif
62d2ce4b 6338 (ib->gpu_addr & 0xFFFFFFFC));
93323131 6339 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6340 amdgpu_ring_write(ring, control);
6341}
6342
aaa36a97 6343static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 6344 u64 seq, unsigned flags)
aaa36a97 6345{
890ee23f
CZ
6346 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6347 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6348
aaa36a97
AD
6349 /* EVENT_WRITE_EOP - flush caches, send int */
6350 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6351 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6352 EOP_TC_ACTION_EN |
f84e63f2 6353 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6354 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6355 EVENT_INDEX(5)));
6356 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 6357 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 6358 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6359 amdgpu_ring_write(ring, lower_32_bits(seq));
6360 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 6361
aaa36a97
AD
6362}
6363
b8c7b39e 6364static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97 6365{
21cd942e 6366 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 6367 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
6368 uint64_t addr = ring->fence_drv.gpu_addr;
6369
6370 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6371 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6372 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6373 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6374 amdgpu_ring_write(ring, addr & 0xfffffffc);
6375 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6376 amdgpu_ring_write(ring, seq);
6377 amdgpu_ring_write(ring, 0xffffffff);
6378 amdgpu_ring_write(ring, 4); /* poll interval */
b8c7b39e
CK
6379}
6380
6381static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
c633c00b 6382 unsigned vmid, uint64_t pd_addr)
b8c7b39e 6383{
21cd942e 6384 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6385
c633c00b 6386 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
aaa36a97
AD
6387
6388 /* wait for the invalidate to complete */
6389 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6390 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6391 WAIT_REG_MEM_FUNCTION(0) | /* always */
6392 WAIT_REG_MEM_ENGINE(0))); /* me */
6393 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6394 amdgpu_ring_write(ring, 0);
6395 amdgpu_ring_write(ring, 0); /* ref */
6396 amdgpu_ring_write(ring, 0); /* mask */
6397 amdgpu_ring_write(ring, 0x20); /* poll interval */
6398
6399 /* compute doesn't have PFP */
6400 if (usepfp) {
6401 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6402 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6403 amdgpu_ring_write(ring, 0x0);
aaa36a97
AD
6404 }
6405}
6406
536fbf94 6407static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
aaa36a97
AD
6408{
6409 return ring->adev->wb.wb[ring->wptr_offs];
6410}
6411
6412static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6413{
6414 struct amdgpu_device *adev = ring->adev;
6415
6416 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6417 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6418 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97
AD
6419}
6420
b8866c26
AR
6421static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6422 bool acquire)
6423{
6424 struct amdgpu_device *adev = ring->adev;
6425 int pipe_num, tmp, reg;
6426 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6427
6428 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6429
6430 /* first me only has 2 entries, GFX and HP3D */
6431 if (ring->me > 0)
6432 pipe_num -= 2;
6433
6434 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6435 tmp = RREG32(reg);
6436 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6437 WREG32(reg, tmp);
6438}
6439
6440static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6441 struct amdgpu_ring *ring,
6442 bool acquire)
6443{
6444 int i, pipe;
6445 bool reserve;
6446 struct amdgpu_ring *iring;
6447
6448 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6449 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6450 if (acquire)
6451 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6452 else
6453 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6454
6455 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6456 /* Clear all reservations - everyone reacquires all resources */
6457 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6458 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6459 true);
6460
6461 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6462 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6463 true);
6464 } else {
6465 /* Lower all pipes without a current reservation */
6466 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6467 iring = &adev->gfx.gfx_ring[i];
6468 pipe = amdgpu_gfx_queue_to_bit(adev,
6469 iring->me,
6470 iring->pipe,
6471 0);
6472 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6473 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6474 }
6475
6476 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6477 iring = &adev->gfx.compute_ring[i];
6478 pipe = amdgpu_gfx_queue_to_bit(adev,
6479 iring->me,
6480 iring->pipe,
6481 0);
6482 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6483 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6484 }
6485 }
6486
6487 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6488}
6489
6490static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6491 struct amdgpu_ring *ring,
6492 bool acquire)
6493{
6494 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6495 uint32_t queue_priority = acquire ? 0xf : 0x0;
6496
6497 mutex_lock(&adev->srbm_mutex);
6498 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6499
6500 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6501 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6502
6503 vi_srbm_select(adev, 0, 0, 0, 0);
6504 mutex_unlock(&adev->srbm_mutex);
6505}
6506static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
1b1f42d8 6507 enum drm_sched_priority priority)
b8866c26
AR
6508{
6509 struct amdgpu_device *adev = ring->adev;
1b1f42d8 6510 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
b8866c26
AR
6511
6512 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6513 return;
6514
6515 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6516 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6517}
6518
aaa36a97
AD
6519static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6520 u64 addr, u64 seq,
890ee23f 6521 unsigned flags)
aaa36a97 6522{
890ee23f
CZ
6523 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6524 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6525
aaa36a97
AD
6526 /* RELEASE_MEM - flush caches, send int */
6527 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6528 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6529 EOP_TC_ACTION_EN |
a3d5aaa8 6530 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6531 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6532 EVENT_INDEX(5)));
890ee23f 6533 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6534 amdgpu_ring_write(ring, addr & 0xfffffffc);
6535 amdgpu_ring_write(ring, upper_32_bits(addr));
6536 amdgpu_ring_write(ring, lower_32_bits(seq));
6537 amdgpu_ring_write(ring, upper_32_bits(seq));
6538}
6539
4e638ae9
XY
6540static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6541 u64 seq, unsigned int flags)
6542{
6543 /* we only allocate 32bit for each seq wb address */
f10b478d 6544 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4e638ae9
XY
6545
6546 /* write fence seq to the "addr" */
6547 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6548 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6549 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6550 amdgpu_ring_write(ring, lower_32_bits(addr));
6551 amdgpu_ring_write(ring, upper_32_bits(addr));
6552 amdgpu_ring_write(ring, lower_32_bits(seq));
6553
6554 if (flags & AMDGPU_FENCE_FLAG_INT) {
6555 /* set register to trigger INT */
6556 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6557 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6558 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6559 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6560 amdgpu_ring_write(ring, 0);
6561 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6562 }
6563}
6564
c2167a65
ML
6565static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6566{
6567 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6568 amdgpu_ring_write(ring, 0);
6569}
6570
753ad49c
ML
6571static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6572{
6573 uint32_t dw2 = 0;
6574
c2ce92fc 6575 if (amdgpu_sriov_vf(ring->adev))
95243543 6576 gfx_v8_0_ring_emit_ce_meta(ring);
c2ce92fc 6577
753ad49c
ML
6578 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6579 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
45682886 6580 gfx_v8_0_ring_emit_vgt_flush(ring);
753ad49c
ML
6581 /* set load_global_config & load_global_uconfig */
6582 dw2 |= 0x8001;
6583 /* set load_cs_sh_regs */
6584 dw2 |= 0x01000000;
6585 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6586 dw2 |= 0x10002;
6587
6588 /* set load_ce_ram if preamble presented */
6589 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6590 dw2 |= 0x10000000;
6591 } else {
6592 /* still load_ce_ram if this is the first time preamble presented
6593 * although there is no context switch happens.
6594 */
6595 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6596 dw2 |= 0x10000000;
6597 }
6598
6599 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6600 amdgpu_ring_write(ring, dw2);
6601 amdgpu_ring_write(ring, 0);
6602}
6603
806ba2d4
ML
6604static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6605{
6606 unsigned ret;
6607
6608 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6609 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6610 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6611 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6612 ret = ring->wptr & ring->buf_mask;
6613 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6614 return ret;
6615}
6616
6617static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6618{
6619 unsigned cur;
6620
6621 BUG_ON(offset > ring->buf_mask);
6622 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6623
6624 cur = (ring->wptr & ring->buf_mask) - 1;
6625 if (likely(cur > offset))
6626 ring->ring[offset] = cur - offset;
6627 else
6628 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6629}
6630
880e87e3
XY
6631static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6632{
6633 struct amdgpu_device *adev = ring->adev;
6634
6635 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6636 amdgpu_ring_write(ring, 0 | /* src: register*/
6637 (5 << 8) | /* dst: memory */
6638 (1 << 20)); /* write confirm */
6639 amdgpu_ring_write(ring, reg);
6640 amdgpu_ring_write(ring, 0);
6641 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6642 adev->virt.reg_val_offs * 4));
6643 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6644 adev->virt.reg_val_offs * 4));
6645}
6646
6647static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6648 uint32_t val)
6649{
9ed88047
CK
6650 uint32_t cmd;
6651
6652 switch (ring->funcs->type) {
6653 case AMDGPU_RING_TYPE_GFX:
6654 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6655 break;
6656 case AMDGPU_RING_TYPE_KIQ:
6657 cmd = 1 << 16; /* no inc addr */
6658 break;
6659 default:
6660 cmd = WR_CONFIRM;
6661 break;
6662 }
6663
880e87e3 6664 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
9ed88047 6665 amdgpu_ring_write(ring, cmd);
880e87e3
XY
6666 amdgpu_ring_write(ring, reg);
6667 amdgpu_ring_write(ring, 0);
6668 amdgpu_ring_write(ring, val);
6669}
6670
aaa36a97
AD
6671static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6672 enum amdgpu_interrupt_state state)
6673{
61cb8cef
TSD
6674 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6675 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6676}
6677
6678static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6679 int me, int pipe,
6680 enum amdgpu_interrupt_state state)
6681{
d0c55cdf 6682 u32 mec_int_cntl, mec_int_cntl_reg;
aaa36a97 6683
aaa36a97 6684 /*
d0c55cdf
AD
6685 * amdgpu controls only the first MEC. That's why this function only
6686 * handles the setting of interrupts for this specific MEC. All other
aaa36a97
AD
6687 * pipes' interrupts are set by amdkfd.
6688 */
6689
6690 if (me == 1) {
6691 switch (pipe) {
6692 case 0:
d0c55cdf
AD
6693 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6694 break;
6695 case 1:
6696 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6697 break;
6698 case 2:
6699 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6700 break;
6701 case 3:
6702 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
aaa36a97
AD
6703 break;
6704 default:
6705 DRM_DEBUG("invalid pipe %d\n", pipe);
6706 return;
6707 }
6708 } else {
6709 DRM_DEBUG("invalid me %d\n", me);
6710 return;
6711 }
6712
d0c55cdf
AD
6713 switch (state) {
6714 case AMDGPU_IRQ_STATE_DISABLE:
6715 mec_int_cntl = RREG32(mec_int_cntl_reg);
6716 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6717 WREG32(mec_int_cntl_reg, mec_int_cntl);
6718 break;
6719 case AMDGPU_IRQ_STATE_ENABLE:
6720 mec_int_cntl = RREG32(mec_int_cntl_reg);
6721 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6722 WREG32(mec_int_cntl_reg, mec_int_cntl);
6723 break;
6724 default:
6725 break;
6726 }
aaa36a97
AD
6727}
6728
6729static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6730 struct amdgpu_irq_src *source,
6731 unsigned type,
6732 enum amdgpu_interrupt_state state)
6733{
61cb8cef
TSD
6734 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6735 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6736
6737 return 0;
6738}
6739
6740static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6741 struct amdgpu_irq_src *source,
6742 unsigned type,
6743 enum amdgpu_interrupt_state state)
6744{
61cb8cef
TSD
6745 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6746 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6747
6748 return 0;
6749}
6750
6751static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6752 struct amdgpu_irq_src *src,
6753 unsigned type,
6754 enum amdgpu_interrupt_state state)
6755{
6756 switch (type) {
6757 case AMDGPU_CP_IRQ_GFX_EOP:
6758 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6759 break;
6760 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6761 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6762 break;
6763 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6764 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6765 break;
6766 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6767 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6768 break;
6769 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6770 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6771 break;
6772 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6773 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6774 break;
6775 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6776 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6777 break;
6778 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6779 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6780 break;
6781 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6782 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6783 break;
6784 default:
6785 break;
6786 }
6787 return 0;
6788}
6789
6790static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6791 struct amdgpu_irq_src *source,
6792 struct amdgpu_iv_entry *entry)
6793{
6794 int i;
6795 u8 me_id, pipe_id, queue_id;
6796 struct amdgpu_ring *ring;
6797
6798 DRM_DEBUG("IH: CP EOP\n");
6799 me_id = (entry->ring_id & 0x0c) >> 2;
6800 pipe_id = (entry->ring_id & 0x03) >> 0;
6801 queue_id = (entry->ring_id & 0x70) >> 4;
6802
6803 switch (me_id) {
6804 case 0:
6805 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6806 break;
6807 case 1:
6808 case 2:
6809 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6810 ring = &adev->gfx.compute_ring[i];
6811 /* Per-queue interrupt is supported for MEC starting from VI.
6812 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6813 */
6814 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6815 amdgpu_fence_process(ring);
6816 }
6817 break;
6818 }
6819 return 0;
6820}
6821
6822static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6823 struct amdgpu_irq_src *source,
6824 struct amdgpu_iv_entry *entry)
6825{
6826 DRM_ERROR("Illegal register access in command stream\n");
6827 schedule_work(&adev->reset_work);
6828 return 0;
6829}
6830
6831static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6832 struct amdgpu_irq_src *source,
6833 struct amdgpu_iv_entry *entry)
6834{
6835 DRM_ERROR("Illegal instruction in command stream\n");
6836 schedule_work(&adev->reset_work);
6837 return 0;
6838}
6839
4e638ae9
XY
6840static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6841 struct amdgpu_irq_src *src,
6842 unsigned int type,
6843 enum amdgpu_interrupt_state state)
6844{
07c397f9 6845 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6846
4e638ae9
XY
6847 switch (type) {
6848 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
ccaf3574
TSD
6849 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6850 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6851 if (ring->me == 1)
6852 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6853 ring->pipe,
6854 GENERIC2_INT_ENABLE,
6855 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6856 else
6857 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6858 ring->pipe,
6859 GENERIC2_INT_ENABLE,
6860 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
4e638ae9
XY
6861 break;
6862 default:
6863 BUG(); /* kiq only support GENERIC2_INT now */
6864 break;
6865 }
6866 return 0;
6867}
6868
6869static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6870 struct amdgpu_irq_src *source,
6871 struct amdgpu_iv_entry *entry)
6872{
6873 u8 me_id, pipe_id, queue_id;
07c397f9 6874 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6875
4e638ae9
XY
6876 me_id = (entry->ring_id & 0x0c) >> 2;
6877 pipe_id = (entry->ring_id & 0x03) >> 0;
6878 queue_id = (entry->ring_id & 0x70) >> 4;
6879 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6880 me_id, pipe_id, queue_id);
6881
6882 amdgpu_fence_process(ring);
6883 return 0;
6884}
6885
a1255107 6886static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 6887 .name = "gfx_v8_0",
aaa36a97 6888 .early_init = gfx_v8_0_early_init,
ccba7691 6889 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
6890 .sw_init = gfx_v8_0_sw_init,
6891 .sw_fini = gfx_v8_0_sw_fini,
6892 .hw_init = gfx_v8_0_hw_init,
6893 .hw_fini = gfx_v8_0_hw_fini,
6894 .suspend = gfx_v8_0_suspend,
6895 .resume = gfx_v8_0_resume,
6896 .is_idle = gfx_v8_0_is_idle,
6897 .wait_for_idle = gfx_v8_0_wait_for_idle,
3d7c6384 6898 .check_soft_reset = gfx_v8_0_check_soft_reset,
1057f20c 6899 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
aaa36a97 6900 .soft_reset = gfx_v8_0_soft_reset,
e4ae0fc3 6901 .post_soft_reset = gfx_v8_0_post_soft_reset,
aaa36a97
AD
6902 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6903 .set_powergating_state = gfx_v8_0_set_powergating_state,
ebd843d6 6904 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
aaa36a97
AD
6905};
6906
6907static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
21cd942e 6908 .type = AMDGPU_RING_TYPE_GFX,
79887142
CK
6909 .align_mask = 0xff,
6910 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6911 .support_64bit_ptrs = false,
e7706b42 6912 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
6913 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6914 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
e9d672b2
ML
6915 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6916 5 + /* COND_EXEC */
6917 7 + /* PIPELINE_SYNC */
5518625d 6918 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
e9d672b2
ML
6919 8 + /* FENCE for VM_FLUSH */
6920 20 + /* GDS switch */
6921 4 + /* double SWITCH_BUFFER,
6922 the first COND_EXEC jump to the place just
6923 prior to this double SWITCH_BUFFER */
6924 5 + /* COND_EXEC */
6925 7 + /* HDP_flush */
6926 4 + /* VGT_flush */
6927 14 + /* CE_META */
6928 31 + /* DE_META */
6929 3 + /* CNTX_CTRL */
6930 5 + /* HDP_INVL */
6931 8 + 8 + /* FENCE x2 */
6932 2, /* SWITCH_BUFFER */
e12f3d7a 6933 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
93323131 6934 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 6935 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 6936 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6937 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6938 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 6939 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
6940 .test_ring = gfx_v8_0_ring_test_ring,
6941 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6942 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6943 .pad_ib = amdgpu_ring_generic_pad_ib,
c2167a65 6944 .emit_switch_buffer = gfx_v8_ring_emit_sb,
753ad49c 6945 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
806ba2d4
ML
6946 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6947 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
9ed88047 6948 .emit_wreg = gfx_v8_0_ring_emit_wreg,
aaa36a97
AD
6949};
6950
6951static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
21cd942e 6952 .type = AMDGPU_RING_TYPE_COMPUTE,
79887142
CK
6953 .align_mask = 0xff,
6954 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6955 .support_64bit_ptrs = false,
e7706b42 6956 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
6957 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6958 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
e12f3d7a
CK
6959 .emit_frame_size =
6960 20 + /* gfx_v8_0_ring_emit_gds_switch */
6961 7 + /* gfx_v8_0_ring_emit_hdp_flush */
2ee150cd 6962 5 + /* hdp_invalidate */
e12f3d7a 6963 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
5518625d 6964 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
e12f3d7a
CK
6965 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6966 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
93323131 6967 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 6968 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 6969 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6970 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6971 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 6972 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
6973 .test_ring = gfx_v8_0_ring_test_ring,
6974 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6975 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6976 .pad_ib = amdgpu_ring_generic_pad_ib,
b8866c26 6977 .set_priority = gfx_v8_0_ring_set_priority_compute,
9ed88047 6978 .emit_wreg = gfx_v8_0_ring_emit_wreg,
aaa36a97
AD
6979};
6980
4e638ae9
XY
6981static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6982 .type = AMDGPU_RING_TYPE_KIQ,
6983 .align_mask = 0xff,
6984 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 6985 .support_64bit_ptrs = false,
4e638ae9
XY
6986 .get_rptr = gfx_v8_0_ring_get_rptr,
6987 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6988 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6989 .emit_frame_size =
6990 20 + /* gfx_v8_0_ring_emit_gds_switch */
6991 7 + /* gfx_v8_0_ring_emit_hdp_flush */
2ee150cd 6992 5 + /* hdp_invalidate */
4e638ae9
XY
6993 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6994 17 + /* gfx_v8_0_ring_emit_vm_flush */
6995 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6996 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6997 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6998 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
4e638ae9
XY
6999 .test_ring = gfx_v8_0_ring_test_ring,
7000 .test_ib = gfx_v8_0_ring_test_ib,
7001 .insert_nop = amdgpu_ring_insert_nop,
7002 .pad_ib = amdgpu_ring_generic_pad_ib,
880e87e3
XY
7003 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7004 .emit_wreg = gfx_v8_0_ring_emit_wreg,
4e638ae9
XY
7005};
7006
aaa36a97
AD
7007static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7008{
7009 int i;
7010
4e638ae9
XY
7011 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7012
aaa36a97
AD
7013 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7014 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7015
7016 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7017 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7018}
7019
7020static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7021 .set = gfx_v8_0_set_eop_interrupt_state,
7022 .process = gfx_v8_0_eop_irq,
7023};
7024
7025static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7026 .set = gfx_v8_0_set_priv_reg_fault_state,
7027 .process = gfx_v8_0_priv_reg_irq,
7028};
7029
7030static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7031 .set = gfx_v8_0_set_priv_inst_fault_state,
7032 .process = gfx_v8_0_priv_inst_irq,
7033};
7034
4e638ae9
XY
7035static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7036 .set = gfx_v8_0_kiq_set_interrupt_state,
7037 .process = gfx_v8_0_kiq_irq,
7038};
7039
aaa36a97
AD
7040static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7041{
7042 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7043 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7044
7045 adev->gfx.priv_reg_irq.num_types = 1;
7046 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7047
7048 adev->gfx.priv_inst_irq.num_types = 1;
7049 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4e638ae9
XY
7050
7051 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7052 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
aaa36a97
AD
7053}
7054
dbff57bc
AD
7055static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7056{
ae6a58e4 7057 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
dbff57bc
AD
7058}
7059
aaa36a97
AD
7060static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7061{
7062 /* init asci gds info */
7063 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7064 adev->gds.gws.total_size = 64;
7065 adev->gds.oa.total_size = 16;
7066
7067 if (adev->gds.mem.total_size == 64 * 1024) {
7068 adev->gds.mem.gfx_partition_size = 4096;
7069 adev->gds.mem.cs_partition_size = 4096;
7070
7071 adev->gds.gws.gfx_partition_size = 4;
7072 adev->gds.gws.cs_partition_size = 4;
7073
7074 adev->gds.oa.gfx_partition_size = 4;
7075 adev->gds.oa.cs_partition_size = 1;
7076 } else {
7077 adev->gds.mem.gfx_partition_size = 1024;
7078 adev->gds.mem.cs_partition_size = 1024;
7079
7080 adev->gds.gws.gfx_partition_size = 16;
7081 adev->gds.gws.cs_partition_size = 16;
7082
7083 adev->gds.oa.gfx_partition_size = 4;
7084 adev->gds.oa.cs_partition_size = 4;
7085 }
7086}
7087
9de06de8
NH
7088static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7089 u32 bitmap)
7090{
7091 u32 data;
7092
7093 if (!bitmap)
7094 return;
7095
7096 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7097 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7098
7099 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7100}
7101
8f8e00c1 7102static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 7103{
8f8e00c1 7104 u32 data, mask;
aaa36a97 7105
5003f278
TSD
7106 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7107 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 7108
378506a7 7109 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 7110
5003f278 7111 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
aaa36a97
AD
7112}
7113
7dae69a2 7114static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
7115{
7116 int i, j, k, counter, active_cu_number = 0;
7117 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 7118 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 7119 unsigned disable_masks[4 * 2];
fe723cd3 7120 u32 ao_cu_num;
aaa36a97 7121
6157bd7a
FC
7122 memset(cu_info, 0, sizeof(*cu_info));
7123
fe723cd3
RZ
7124 if (adev->flags & AMD_IS_APU)
7125 ao_cu_num = 2;
7126 else
7127 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7128
9de06de8
NH
7129 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7130
aaa36a97
AD
7131 mutex_lock(&adev->grbm_idx_mutex);
7132 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7133 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7134 mask = 1;
7135 ao_bitmap = 0;
7136 counter = 0;
9559ef5b 7137 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
7138 if (i < 4 && j < 2)
7139 gfx_v8_0_set_user_cu_inactive_bitmap(
7140 adev, disable_masks[i * 2 + j]);
8f8e00c1 7141 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
7142 cu_info->bitmap[i][j] = bitmap;
7143
fe723cd3 7144 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
aaa36a97 7145 if (bitmap & mask) {
fe723cd3 7146 if (counter < ao_cu_num)
aaa36a97
AD
7147 ao_bitmap |= mask;
7148 counter ++;
7149 }
7150 mask <<= 1;
7151 }
7152 active_cu_number += counter;
dbfe85ea
FC
7153 if (i < 2 && j < 2)
7154 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7155 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
aaa36a97
AD
7156 }
7157 }
9559ef5b 7158 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 7159 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
7160
7161 cu_info->number = active_cu_number;
7162 cu_info->ao_cu_mask = ao_cu_mask;
ebdebf42
FC
7163 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7164 cu_info->max_waves_per_simd = 10;
7165 cu_info->max_scratch_slots_per_cu = 32;
7166 cu_info->wave_front_size = 64;
7167 cu_info->lds_size = 64;
aaa36a97 7168}
a1255107
AD
7169
7170const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7171{
7172 .type = AMD_IP_BLOCK_TYPE_GFX,
7173 .major = 8,
7174 .minor = 0,
7175 .rev = 0,
7176 .funcs = &gfx_v8_0_ip_funcs,
7177};
7178
7179const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7180{
7181 .type = AMD_IP_BLOCK_TYPE_GFX,
7182 .major = 8,
7183 .minor = 1,
7184 .rev = 0,
7185 .funcs = &gfx_v8_0_ip_funcs,
7186};
acad2b2a 7187
95243543 7188static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
acad2b2a
ML
7189{
7190 uint64_t ce_payload_addr;
7191 int cnt_ce;
d81a2209 7192 union {
49abb980
XY
7193 struct vi_ce_ib_state regular;
7194 struct vi_ce_ib_state_chained_ib chained;
e8411302 7195 } ce_payload = {};
acad2b2a
ML
7196
7197 if (ring->adev->virt.chained_ib_support) {
6f05c4e9 7198 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
97745f68 7199 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
acad2b2a
ML
7200 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7201 } else {
6f05c4e9 7202 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
97745f68 7203 offsetof(struct vi_gfx_meta_data, ce_payload);
acad2b2a
ML
7204 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7205 }
7206
7207 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7208 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7209 WRITE_DATA_DST_SEL(8) |
7210 WR_CONFIRM) |
7211 WRITE_DATA_CACHE_POLICY(0));
7212 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7213 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7214 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7215}
7216
95243543 7217static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
acad2b2a 7218{
95243543 7219 uint64_t de_payload_addr, gds_addr, csa_addr;
acad2b2a 7220 int cnt_de;
d81a2209 7221 union {
49abb980
XY
7222 struct vi_de_ib_state regular;
7223 struct vi_de_ib_state_chained_ib chained;
e8411302 7224 } de_payload = {};
acad2b2a 7225
6f05c4e9 7226 csa_addr = amdgpu_csa_vaddr(ring->adev);
acad2b2a
ML
7227 gds_addr = csa_addr + 4096;
7228 if (ring->adev->virt.chained_ib_support) {
7229 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7230 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7231 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
acad2b2a
ML
7232 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7233 } else {
7234 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7235 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7236 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
acad2b2a
ML
7237 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7238 }
7239
7240 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7241 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7242 WRITE_DATA_DST_SEL(8) |
7243 WR_CONFIRM) |
7244 WRITE_DATA_CACHE_POLICY(0));
7245 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7246 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7247 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7248}