drm/amdgpu/gfx8: reduce the functon params for mpq setup
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
aeab2032 28#include "vi_structs.h"
aaa36a97
AD
29#include "vid.h"
30#include "amdgpu_ucode.h"
68182d90 31#include "amdgpu_atombios.h"
eeade25a 32#include "atombios_i2c.h"
aaa36a97
AD
33#include "clearstate_vi.h"
34
35#include "gmc/gmc_8_2_d.h"
36#include "gmc/gmc_8_2_sh_mask.h"
37
38#include "oss/oss_3_0_d.h"
39#include "oss/oss_3_0_sh_mask.h"
40
41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h"
43
44#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h"
47#include "gca/gfx_8_0_enum.h"
48
aaa36a97
AD
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
d9d533c1
KW
52#include "smu/smu_7_1_3_d.h"
53
aaa36a97
AD
54#define GFX8_NUM_GFX_RINGS 1
55#define GFX8_NUM_COMPUTE_RINGS 8
56
57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
6e378858
EH
72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
78
79/* BPM SERDES CMD */
80#define SET_BPM_SERDES_CMD 1
81#define CLE_BPM_SERDES_CMD 0
82
83/* BPM Register Address*/
84enum {
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
90 BPM_REG_FGCG_MAX
91};
92
2b6cd977
EH
93#define RLC_FormatDirectRegListLength 14
94
c65444fe
JZ
95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
e3c7656c
SL
102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
c65444fe
JZ
108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 120
af15a2d5
DZ
121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
2cc0c0b5
FC
128MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
68182d90 134
2cc0c0b5
FC
135MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 141
c4642a47
JZ
142MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
aaa36a97
AD
149static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150{
151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167};
168
169static const u32 golden_settings_tonga_a11[] =
170{
171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
ff9d6460 178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
187};
188
189static const u32 tonga_golden_common_all[] =
190{
191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199};
200
201static const u32 tonga_mgcg_cgcg_init[] =
202{
203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278};
279
2cc0c0b5 280static const u32 golden_settings_polaris11_a11[] =
68182d90 281{
9761bc53
HR
282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
9761bc53 292 mmSQ_CONFIG, 0x07f80000, 0x01180000,
68182d90
FC
293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
299};
300
2cc0c0b5 301static const u32 polaris11_golden_common_all[] =
68182d90
FC
302{
303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309};
310
2cc0c0b5 311static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
312{
313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
a5a5e308
HR
314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
330};
331
2cc0c0b5 332static const u32 polaris10_golden_common_all[] =
68182d90
FC
333{
334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342};
343
af15a2d5
DZ
344static const u32 fiji_golden_common_all[] =
345{
346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
356};
357
358static const u32 golden_settings_fiji_a10[] =
359{
360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
367 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
371};
372
373static const u32 fiji_mgcg_cgcg_init[] =
374{
a7ca8ef9 375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410};
411
aaa36a97
AD
412static const u32 golden_settings_iceland_a11[] =
413{
414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
fe85f07f 422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430};
431
432static const u32 iceland_golden_common_all[] =
433{
434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442};
443
444static const u32 iceland_mgcg_cgcg_init[] =
445{
446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510};
511
512static const u32 cz_golden_settings_a11[] =
513{
514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
3a494b58 519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
3a494b58 522 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526};
527
528static const u32 cz_golden_common_all[] =
529{
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538};
539
540static const u32 cz_mgcg_cgcg_init[] =
541{
542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617};
618
e3c7656c
SL
619static const u32 stoney_golden_settings_a11[] =
620{
621 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 627 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631};
632
633static const u32 stoney_golden_common_all[] =
634{
635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643};
644
645static const u32 stoney_mgcg_cgcg_init[] =
646{
647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
e3c7656c
SL
652};
653
aaa36a97
AD
654static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 657static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 658static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 659static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
acad2b2a
ML
660static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
0875a242
AD
662static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
aaa36a97
AD
664
665static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666{
667 switch (adev->asic_type) {
668 case CHIP_TOPAZ:
669 amdgpu_program_register_sequence(adev,
670 iceland_mgcg_cgcg_init,
671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672 amdgpu_program_register_sequence(adev,
673 golden_settings_iceland_a11,
674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675 amdgpu_program_register_sequence(adev,
676 iceland_golden_common_all,
677 (const u32)ARRAY_SIZE(iceland_golden_common_all));
678 break;
af15a2d5
DZ
679 case CHIP_FIJI:
680 amdgpu_program_register_sequence(adev,
681 fiji_mgcg_cgcg_init,
682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 golden_settings_fiji_a10,
685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686 amdgpu_program_register_sequence(adev,
687 fiji_golden_common_all,
688 (const u32)ARRAY_SIZE(fiji_golden_common_all));
689 break;
690
aaa36a97
AD
691 case CHIP_TONGA:
692 amdgpu_program_register_sequence(adev,
693 tonga_mgcg_cgcg_init,
694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695 amdgpu_program_register_sequence(adev,
696 golden_settings_tonga_a11,
697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698 amdgpu_program_register_sequence(adev,
699 tonga_golden_common_all,
700 (const u32)ARRAY_SIZE(tonga_golden_common_all));
701 break;
2cc0c0b5 702 case CHIP_POLARIS11:
c4642a47 703 case CHIP_POLARIS12:
68182d90 704 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
705 golden_settings_polaris11_a11,
706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
68182d90 707 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
708 polaris11_golden_common_all,
709 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
68182d90 710 break;
2cc0c0b5 711 case CHIP_POLARIS10:
68182d90 712 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
713 golden_settings_polaris10_a11,
714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
68182d90 715 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
716 polaris10_golden_common_all,
717 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
5765a36d
RZ
719 if (adev->pdev->revision == 0xc7 &&
720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
eeade25a
KW
723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725 }
68182d90 726 break;
aaa36a97
AD
727 case CHIP_CARRIZO:
728 amdgpu_program_register_sequence(adev,
729 cz_mgcg_cgcg_init,
730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_settings_a11,
733 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734 amdgpu_program_register_sequence(adev,
735 cz_golden_common_all,
736 (const u32)ARRAY_SIZE(cz_golden_common_all));
737 break;
e3c7656c
SL
738 case CHIP_STONEY:
739 amdgpu_program_register_sequence(adev,
740 stoney_mgcg_cgcg_init,
741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_settings_a11,
744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745 amdgpu_program_register_sequence(adev,
746 stoney_golden_common_all,
747 (const u32)ARRAY_SIZE(stoney_golden_common_all));
748 break;
aaa36a97
AD
749 default:
750 break;
751 }
752}
753
754static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755{
aaa36a97
AD
756 adev->gfx.scratch.num_reg = 7;
757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
50261151 758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
aaa36a97
AD
759}
760
761static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762{
763 struct amdgpu_device *adev = ring->adev;
764 uint32_t scratch;
765 uint32_t tmp = 0;
766 unsigned i;
767 int r;
768
769 r = amdgpu_gfx_scratch_get(adev, &scratch);
770 if (r) {
771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772 return r;
773 }
774 WREG32(scratch, 0xCAFEDEAD);
a27de35c 775 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
776 if (r) {
777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778 ring->idx, r);
779 amdgpu_gfx_scratch_free(adev, scratch);
780 return r;
781 }
782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 785 amdgpu_ring_commit(ring);
aaa36a97
AD
786
787 for (i = 0; i < adev->usec_timeout; i++) {
788 tmp = RREG32(scratch);
789 if (tmp == 0xDEADBEEF)
790 break;
791 DRM_UDELAY(1);
792 }
793 if (i < adev->usec_timeout) {
794 DRM_INFO("ring test on %d succeeded in %d usecs\n",
795 ring->idx, i);
796 } else {
797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798 ring->idx, scratch, tmp);
799 r = -EINVAL;
800 }
801 amdgpu_gfx_scratch_free(adev, scratch);
802 return r;
803}
804
bbec97aa 805static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
aaa36a97
AD
806{
807 struct amdgpu_device *adev = ring->adev;
808 struct amdgpu_ib ib;
f54d1867 809 struct dma_fence *f = NULL;
aaa36a97
AD
810 uint32_t scratch;
811 uint32_t tmp = 0;
bbec97aa 812 long r;
aaa36a97
AD
813
814 r = amdgpu_gfx_scratch_get(adev, &scratch);
815 if (r) {
bbec97aa 816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
aaa36a97
AD
817 return r;
818 }
819 WREG32(scratch, 0xCAFEDEAD);
b203dd95 820 memset(&ib, 0, sizeof(ib));
b07c60c0 821 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97 822 if (r) {
bbec97aa 823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
42d13693 824 goto err1;
aaa36a97
AD
825 }
826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828 ib.ptr[2] = 0xDEADBEEF;
829 ib.length_dw = 3;
42d13693 830
50ddc75e 831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
42d13693
CZ
832 if (r)
833 goto err2;
834
f54d1867 835 r = dma_fence_wait_timeout(f, false, timeout);
bbec97aa
CK
836 if (r == 0) {
837 DRM_ERROR("amdgpu: IB test timed out.\n");
838 r = -ETIMEDOUT;
839 goto err2;
840 } else if (r < 0) {
841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
42d13693 842 goto err2;
aaa36a97 843 }
6d44565d
CK
844 tmp = RREG32(scratch);
845 if (tmp == 0xDEADBEEF) {
846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
bbec97aa 847 r = 0;
aaa36a97
AD
848 } else {
849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850 scratch, tmp);
851 r = -EINVAL;
852 }
42d13693 853err2:
cc55c45d 854 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 855 dma_fence_put(f);
42d13693
CZ
856err1:
857 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
858 return r;
859}
860
13331ac3
ML
861
862static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863 release_firmware(adev->gfx.pfp_fw);
864 adev->gfx.pfp_fw = NULL;
865 release_firmware(adev->gfx.me_fw);
866 adev->gfx.me_fw = NULL;
867 release_firmware(adev->gfx.ce_fw);
868 adev->gfx.ce_fw = NULL;
869 release_firmware(adev->gfx.rlc_fw);
870 adev->gfx.rlc_fw = NULL;
871 release_firmware(adev->gfx.mec_fw);
872 adev->gfx.mec_fw = NULL;
873 if ((adev->asic_type != CHIP_STONEY) &&
874 (adev->asic_type != CHIP_TOPAZ))
875 release_firmware(adev->gfx.mec2_fw);
876 adev->gfx.mec2_fw = NULL;
877
878 kfree(adev->gfx.rlc.register_list_format);
879}
880
aaa36a97
AD
881static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882{
883 const char *chip_name;
884 char fw_name[30];
885 int err;
886 struct amdgpu_firmware_info *info = NULL;
887 const struct common_firmware_header *header = NULL;
595fd013 888 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
889 const struct rlc_firmware_header_v2_0 *rlc_hdr;
890 unsigned int *tmp = NULL, i;
aaa36a97
AD
891
892 DRM_DEBUG("\n");
893
894 switch (adev->asic_type) {
895 case CHIP_TOPAZ:
896 chip_name = "topaz";
897 break;
898 case CHIP_TONGA:
899 chip_name = "tonga";
900 break;
901 case CHIP_CARRIZO:
902 chip_name = "carrizo";
903 break;
af15a2d5
DZ
904 case CHIP_FIJI:
905 chip_name = "fiji";
906 break;
2cc0c0b5
FC
907 case CHIP_POLARIS11:
908 chip_name = "polaris11";
68182d90 909 break;
2cc0c0b5
FC
910 case CHIP_POLARIS10:
911 chip_name = "polaris10";
68182d90 912 break;
c4642a47
JZ
913 case CHIP_POLARIS12:
914 chip_name = "polaris12";
915 break;
e3c7656c
SL
916 case CHIP_STONEY:
917 chip_name = "stoney";
918 break;
aaa36a97
AD
919 default:
920 BUG();
921 }
922
c65444fe 923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925 if (err)
926 goto out;
927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928 if (err)
929 goto out;
595fd013
JZ
930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 933
c65444fe 934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 if (err)
937 goto out;
938 err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 if (err)
940 goto out;
595fd013
JZ
941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
ae65a26d
ML
943
944 /* chain ib ucode isn't formal released, just disable it by far
945 * TODO: when ucod ready we should use ucode version to judge if
946 * chain-ib support or not.
947 */
948 adev->virt.chained_ib_support = false;
949
595fd013 950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 951
c65444fe 952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
953 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
954 if (err)
955 goto out;
956 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
957 if (err)
958 goto out;
595fd013
JZ
959 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 962
c65444fe 963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
965 if (err)
966 goto out;
967 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
968 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
969 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
970 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
971
972 adev->gfx.rlc.save_and_restore_offset =
973 le32_to_cpu(rlc_hdr->save_and_restore_offset);
974 adev->gfx.rlc.clear_state_descriptor_offset =
975 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
976 adev->gfx.rlc.avail_scratch_ram_locations =
977 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
978 adev->gfx.rlc.reg_restore_list_size =
979 le32_to_cpu(rlc_hdr->reg_restore_list_size);
980 adev->gfx.rlc.reg_list_format_start =
981 le32_to_cpu(rlc_hdr->reg_list_format_start);
982 adev->gfx.rlc.reg_list_format_separate_start =
983 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
984 adev->gfx.rlc.starting_offsets_start =
985 le32_to_cpu(rlc_hdr->starting_offsets_start);
986 adev->gfx.rlc.reg_list_format_size_bytes =
987 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
988 adev->gfx.rlc.reg_list_size_bytes =
989 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
990
991 adev->gfx.rlc.register_list_format =
992 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
993 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
994
995 if (!adev->gfx.rlc.register_list_format) {
996 err = -ENOMEM;
997 goto out;
998 }
999
ae17c999 1000 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1001 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1002 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1003 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1004
1005 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1006
ae17c999 1007 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1008 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1009 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1010 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 1011
c65444fe 1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
1013 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1014 if (err)
1015 goto out;
1016 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1017 if (err)
1018 goto out;
595fd013
JZ
1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1020 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1022
97dde76a
AD
1023 if ((adev->asic_type != CHIP_STONEY) &&
1024 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1026 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1027 if (!err) {
1028 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1029 if (err)
1030 goto out;
1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1032 adev->gfx.mec2_fw->data;
1033 adev->gfx.mec2_fw_version =
1034 le32_to_cpu(cp_hdr->header.ucode_version);
1035 adev->gfx.mec2_feature_version =
1036 le32_to_cpu(cp_hdr->ucode_feature_version);
1037 } else {
1038 err = 0;
1039 adev->gfx.mec2_fw = NULL;
1040 }
aaa36a97
AD
1041 }
1042
1043 if (adev->firmware.smu_load) {
1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1045 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1046 info->fw = adev->gfx.pfp_fw;
1047 header = (const struct common_firmware_header *)info->fw->data;
1048 adev->firmware.fw_size +=
1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1052 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1053 info->fw = adev->gfx.me_fw;
1054 header = (const struct common_firmware_header *)info->fw->data;
1055 adev->firmware.fw_size +=
1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1059 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1060 info->fw = adev->gfx.ce_fw;
1061 header = (const struct common_firmware_header *)info->fw->data;
1062 adev->firmware.fw_size +=
1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1064
1065 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1066 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1067 info->fw = adev->gfx.rlc_fw;
1068 header = (const struct common_firmware_header *)info->fw->data;
1069 adev->firmware.fw_size +=
1070 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1071
1072 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1073 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1074 info->fw = adev->gfx.mec_fw;
1075 header = (const struct common_firmware_header *)info->fw->data;
1076 adev->firmware.fw_size +=
1077 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1078
4c2b2453
ML
1079 /* we need account JT in */
1080 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1081 adev->firmware.fw_size +=
1082 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1083
bed5712e
ML
1084 if (amdgpu_sriov_vf(adev)) {
1085 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1086 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1087 info->fw = adev->gfx.mec_fw;
1088 adev->firmware.fw_size +=
1089 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1090 }
1091
aaa36a97
AD
1092 if (adev->gfx.mec2_fw) {
1093 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1094 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1095 info->fw = adev->gfx.mec2_fw;
1096 header = (const struct common_firmware_header *)info->fw->data;
1097 adev->firmware.fw_size +=
1098 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1099 }
1100
1101 }
1102
1103out:
1104 if (err) {
1105 dev_err(adev->dev,
1106 "gfx8: Failed to load firmware \"%s\"\n",
1107 fw_name);
1108 release_firmware(adev->gfx.pfp_fw);
1109 adev->gfx.pfp_fw = NULL;
1110 release_firmware(adev->gfx.me_fw);
1111 adev->gfx.me_fw = NULL;
1112 release_firmware(adev->gfx.ce_fw);
1113 adev->gfx.ce_fw = NULL;
1114 release_firmware(adev->gfx.rlc_fw);
1115 adev->gfx.rlc_fw = NULL;
1116 release_firmware(adev->gfx.mec_fw);
1117 adev->gfx.mec_fw = NULL;
1118 release_firmware(adev->gfx.mec2_fw);
1119 adev->gfx.mec2_fw = NULL;
1120 }
1121 return err;
1122}
1123
2b6cd977
EH
1124static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1125 volatile u32 *buffer)
1126{
1127 u32 count = 0, i;
1128 const struct cs_section_def *sect = NULL;
1129 const struct cs_extent_def *ext = NULL;
1130
1131 if (adev->gfx.rlc.cs_data == NULL)
1132 return;
1133 if (buffer == NULL)
1134 return;
1135
1136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1137 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1138
1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1140 buffer[count++] = cpu_to_le32(0x80000000);
1141 buffer[count++] = cpu_to_le32(0x80000000);
1142
1143 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1144 for (ext = sect->section; ext->extent != NULL; ++ext) {
1145 if (sect->id == SECT_CONTEXT) {
1146 buffer[count++] =
1147 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1148 buffer[count++] = cpu_to_le32(ext->reg_index -
1149 PACKET3_SET_CONTEXT_REG_START);
1150 for (i = 0; i < ext->reg_count; i++)
1151 buffer[count++] = cpu_to_le32(ext->extent[i]);
1152 } else {
1153 return;
1154 }
1155 }
1156 }
1157
1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1159 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1160 PACKET3_SET_CONTEXT_REG_START);
34817db6
AD
1161 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1162 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
2b6cd977
EH
1163
1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1165 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1166
1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1168 buffer[count++] = cpu_to_le32(0);
1169}
1170
fb16007b
AD
1171static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1172{
1173 const __le32 *fw_data;
1174 volatile u32 *dst_ptr;
1175 int me, i, max_me = 4;
1176 u32 bo_offset = 0;
1177 u32 table_offset, table_size;
1178
1179 if (adev->asic_type == CHIP_CARRIZO)
1180 max_me = 5;
1181
1182 /* write the cp table buffer */
1183 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1184 for (me = 0; me < max_me; me++) {
1185 if (me == 0) {
1186 const struct gfx_firmware_header_v1_0 *hdr =
1187 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1188 fw_data = (const __le32 *)
1189 (adev->gfx.ce_fw->data +
1190 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191 table_offset = le32_to_cpu(hdr->jt_offset);
1192 table_size = le32_to_cpu(hdr->jt_size);
1193 } else if (me == 1) {
1194 const struct gfx_firmware_header_v1_0 *hdr =
1195 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1196 fw_data = (const __le32 *)
1197 (adev->gfx.pfp_fw->data +
1198 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199 table_offset = le32_to_cpu(hdr->jt_offset);
1200 table_size = le32_to_cpu(hdr->jt_size);
1201 } else if (me == 2) {
1202 const struct gfx_firmware_header_v1_0 *hdr =
1203 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1204 fw_data = (const __le32 *)
1205 (adev->gfx.me_fw->data +
1206 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207 table_offset = le32_to_cpu(hdr->jt_offset);
1208 table_size = le32_to_cpu(hdr->jt_size);
1209 } else if (me == 3) {
1210 const struct gfx_firmware_header_v1_0 *hdr =
1211 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1212 fw_data = (const __le32 *)
1213 (adev->gfx.mec_fw->data +
1214 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215 table_offset = le32_to_cpu(hdr->jt_offset);
1216 table_size = le32_to_cpu(hdr->jt_size);
1217 } else if (me == 4) {
1218 const struct gfx_firmware_header_v1_0 *hdr =
1219 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1220 fw_data = (const __le32 *)
1221 (adev->gfx.mec2_fw->data +
1222 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1223 table_offset = le32_to_cpu(hdr->jt_offset);
1224 table_size = le32_to_cpu(hdr->jt_size);
1225 }
1226
1227 for (i = 0; i < table_size; i ++) {
1228 dst_ptr[bo_offset + i] =
1229 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1230 }
1231
1232 bo_offset += table_size;
1233 }
1234}
1235
2b6cd977
EH
1236static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1237{
1238 int r;
1239
1240 /* clear state block */
1241 if (adev->gfx.rlc.clear_state_obj) {
1242 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1243 if (unlikely(r != 0))
62d2ce4b 1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
2b6cd977
EH
1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1246 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2b6cd977
EH
1247 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1248 adev->gfx.rlc.clear_state_obj = NULL;
1249 }
fb16007b
AD
1250
1251 /* jump table block */
1252 if (adev->gfx.rlc.cp_table_obj) {
1253 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1254 if (unlikely(r != 0))
1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1257 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1258 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1259 adev->gfx.rlc.cp_table_obj = NULL;
1260 }
2b6cd977
EH
1261}
1262
1263static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1264{
1265 volatile u32 *dst_ptr;
1266 u32 dws;
1267 const struct cs_section_def *cs_data;
1268 int r;
1269
1270 adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272 cs_data = adev->gfx.rlc.cs_data;
1273
1274 if (cs_data) {
1275 /* clear state block */
1276 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1277
1278 if (adev->gfx.rlc.clear_state_obj == NULL) {
1279 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1280 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1282 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
2b6cd977
EH
1283 NULL, NULL,
1284 &adev->gfx.rlc.clear_state_obj);
1285 if (r) {
1286 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1287 gfx_v8_0_rlc_fini(adev);
1288 return r;
1289 }
1290 }
1291 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1292 if (unlikely(r != 0)) {
1293 gfx_v8_0_rlc_fini(adev);
1294 return r;
1295 }
1296 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1297 &adev->gfx.rlc.clear_state_gpu_addr);
1298 if (r) {
1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
62d2ce4b 1300 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
2b6cd977
EH
1301 gfx_v8_0_rlc_fini(adev);
1302 return r;
1303 }
1304
1305 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1306 if (r) {
62d2ce4b 1307 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
2b6cd977
EH
1308 gfx_v8_0_rlc_fini(adev);
1309 return r;
1310 }
1311 /* set up the cs buffer */
1312 dst_ptr = adev->gfx.rlc.cs_ptr;
1313 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1314 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1315 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1316 }
1317
fb16007b
AD
1318 if ((adev->asic_type == CHIP_CARRIZO) ||
1319 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1320 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
fb16007b
AD
1321 if (adev->gfx.rlc.cp_table_obj == NULL) {
1322 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1323 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1324 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1325 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
fb16007b
AD
1326 NULL, NULL,
1327 &adev->gfx.rlc.cp_table_obj);
1328 if (r) {
1329 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1330 return r;
1331 }
1332 }
1333
1334 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1335 if (unlikely(r != 0)) {
1336 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1337 return r;
1338 }
1339 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1340 &adev->gfx.rlc.cp_table_gpu_addr);
1341 if (r) {
1342 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
62d2ce4b 1343 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
fb16007b
AD
1344 return r;
1345 }
1346 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1347 if (r) {
1348 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1349 return r;
1350 }
1351
1352 cz_init_cp_jump_table(adev);
1353
1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1356 }
1357
2b6cd977
EH
1358 return 0;
1359}
1360
aaa36a97
AD
1361static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362{
1363 int r;
1364
1365 if (adev->gfx.mec.hpd_eop_obj) {
1366 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1367 if (unlikely(r != 0))
1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1370 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
aaa36a97
AD
1371 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1372 adev->gfx.mec.hpd_eop_obj = NULL;
1373 }
1374}
1375
4e638ae9
XY
1376static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377 struct amdgpu_ring *ring,
1378 struct amdgpu_irq_src *irq)
1379{
1380 int r = 0;
1381
bffa2280
ML
1382 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1383 if (r)
1384 return r;
880e87e3 1385
4e638ae9
XY
1386 ring->adev = NULL;
1387 ring->ring_obj = NULL;
1388 ring->use_doorbell = true;
1389 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1390 if (adev->gfx.mec2_fw) {
1391 ring->me = 2;
1392 ring->pipe = 0;
1393 } else {
1394 ring->me = 1;
1395 ring->pipe = 1;
1396 }
1397
4e638ae9
XY
1398 ring->queue = 0;
1399 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1400 r = amdgpu_ring_init(adev, ring, 1024,
1401 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1402 if (r)
1403 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1404
1405 return r;
1406}
4e638ae9
XY
1407static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1408 struct amdgpu_irq_src *irq)
1409{
bffa2280 1410 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
4e638ae9 1411 amdgpu_ring_fini(ring);
4e638ae9
XY
1412}
1413
aaa36a97
AD
1414#define MEC_HPD_SIZE 2048
1415
1416static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1417{
1418 int r;
1419 u32 *hpd;
1420
1421 /*
1422 * we assign only 1 pipe because all other pipes will
1423 * be handled by KFD
1424 */
1425 adev->gfx.mec.num_mec = 1;
1426 adev->gfx.mec.num_pipe = 1;
1427 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1428
1429 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1430 r = amdgpu_bo_create(adev,
ad3b9614 1431 adev->gfx.mec.num_queue * MEC_HPD_SIZE,
aaa36a97 1432 PAGE_SIZE, true,
72d7668b 1433 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
1434 &adev->gfx.mec.hpd_eop_obj);
1435 if (r) {
1436 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1437 return r;
1438 }
1439 }
1440
1441 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1442 if (unlikely(r != 0)) {
1443 gfx_v8_0_mec_fini(adev);
1444 return r;
1445 }
1446 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1447 &adev->gfx.mec.hpd_eop_gpu_addr);
1448 if (r) {
1449 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1450 gfx_v8_0_mec_fini(adev);
1451 return r;
1452 }
1453 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1454 if (r) {
1455 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1456 gfx_v8_0_mec_fini(adev);
1457 return r;
1458 }
1459
ad3b9614 1460 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
aaa36a97
AD
1461
1462 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1463 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1464
1465 return 0;
1466}
1467
4e638ae9
XY
1468static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1469{
1470 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1471
1472 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
4e638ae9
XY
1473}
1474
1475static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1476{
1477 int r;
1478 u32 *hpd;
1479 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1480
1481 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1482 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1483 &kiq->eop_gpu_addr, (void **)&hpd);
1484 if (r) {
1485 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1486 return r;
1487 }
1488
1489 memset(hpd, 0, MEC_HPD_SIZE);
1490
f2effd49
AD
1491 r = amdgpu_bo_reserve(kiq->eop_obj, false);
1492 if (unlikely(r != 0))
1493 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
4e638ae9 1494 amdgpu_bo_kunmap(kiq->eop_obj);
f2effd49 1495 amdgpu_bo_unreserve(kiq->eop_obj);
4e638ae9
XY
1496
1497 return 0;
1498}
1499
ccba7691
AD
1500static const u32 vgpr_init_compute_shader[] =
1501{
1502 0x7e000209, 0x7e020208,
1503 0x7e040207, 0x7e060206,
1504 0x7e080205, 0x7e0a0204,
1505 0x7e0c0203, 0x7e0e0202,
1506 0x7e100201, 0x7e120200,
1507 0x7e140209, 0x7e160208,
1508 0x7e180207, 0x7e1a0206,
1509 0x7e1c0205, 0x7e1e0204,
1510 0x7e200203, 0x7e220202,
1511 0x7e240201, 0x7e260200,
1512 0x7e280209, 0x7e2a0208,
1513 0x7e2c0207, 0x7e2e0206,
1514 0x7e300205, 0x7e320204,
1515 0x7e340203, 0x7e360202,
1516 0x7e380201, 0x7e3a0200,
1517 0x7e3c0209, 0x7e3e0208,
1518 0x7e400207, 0x7e420206,
1519 0x7e440205, 0x7e460204,
1520 0x7e480203, 0x7e4a0202,
1521 0x7e4c0201, 0x7e4e0200,
1522 0x7e500209, 0x7e520208,
1523 0x7e540207, 0x7e560206,
1524 0x7e580205, 0x7e5a0204,
1525 0x7e5c0203, 0x7e5e0202,
1526 0x7e600201, 0x7e620200,
1527 0x7e640209, 0x7e660208,
1528 0x7e680207, 0x7e6a0206,
1529 0x7e6c0205, 0x7e6e0204,
1530 0x7e700203, 0x7e720202,
1531 0x7e740201, 0x7e760200,
1532 0x7e780209, 0x7e7a0208,
1533 0x7e7c0207, 0x7e7e0206,
1534 0xbf8a0000, 0xbf810000,
1535};
1536
1537static const u32 sgpr_init_compute_shader[] =
1538{
1539 0xbe8a0100, 0xbe8c0102,
1540 0xbe8e0104, 0xbe900106,
1541 0xbe920108, 0xbe940100,
1542 0xbe960102, 0xbe980104,
1543 0xbe9a0106, 0xbe9c0108,
1544 0xbe9e0100, 0xbea00102,
1545 0xbea20104, 0xbea40106,
1546 0xbea60108, 0xbea80100,
1547 0xbeaa0102, 0xbeac0104,
1548 0xbeae0106, 0xbeb00108,
1549 0xbeb20100, 0xbeb40102,
1550 0xbeb60104, 0xbeb80106,
1551 0xbeba0108, 0xbebc0100,
1552 0xbebe0102, 0xbec00104,
1553 0xbec20106, 0xbec40108,
1554 0xbec60100, 0xbec80102,
1555 0xbee60004, 0xbee70005,
1556 0xbeea0006, 0xbeeb0007,
1557 0xbee80008, 0xbee90009,
1558 0xbefc0000, 0xbf8a0000,
1559 0xbf810000, 0x00000000,
1560};
1561
1562static const u32 vgpr_init_regs[] =
1563{
1564 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1565 mmCOMPUTE_RESOURCE_LIMITS, 0,
1566 mmCOMPUTE_NUM_THREAD_X, 256*4,
1567 mmCOMPUTE_NUM_THREAD_Y, 1,
1568 mmCOMPUTE_NUM_THREAD_Z, 1,
1569 mmCOMPUTE_PGM_RSRC2, 20,
1570 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1571 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1572 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1573 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1574 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1575 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1576 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1577 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1578 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1579 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1580};
1581
1582static const u32 sgpr1_init_regs[] =
1583{
1584 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1585 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1586 mmCOMPUTE_NUM_THREAD_X, 256*5,
1587 mmCOMPUTE_NUM_THREAD_Y, 1,
1588 mmCOMPUTE_NUM_THREAD_Z, 1,
1589 mmCOMPUTE_PGM_RSRC2, 20,
1590 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1591 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1592 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1593 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1594 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1595 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1596 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1597 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1598 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1599 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1600};
1601
1602static const u32 sgpr2_init_regs[] =
1603{
1604 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1605 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1606 mmCOMPUTE_NUM_THREAD_X, 256*5,
1607 mmCOMPUTE_NUM_THREAD_Y, 1,
1608 mmCOMPUTE_NUM_THREAD_Z, 1,
1609 mmCOMPUTE_PGM_RSRC2, 20,
1610 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1611 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1612 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1613 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1614 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1615 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1616 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1617 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1618 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1619 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1620};
1621
1622static const u32 sec_ded_counter_registers[] =
1623{
1624 mmCPC_EDC_ATC_CNT,
1625 mmCPC_EDC_SCRATCH_CNT,
1626 mmCPC_EDC_UCODE_CNT,
1627 mmCPF_EDC_ATC_CNT,
1628 mmCPF_EDC_ROQ_CNT,
1629 mmCPF_EDC_TAG_CNT,
1630 mmCPG_EDC_ATC_CNT,
1631 mmCPG_EDC_DMA_CNT,
1632 mmCPG_EDC_TAG_CNT,
1633 mmDC_EDC_CSINVOC_CNT,
1634 mmDC_EDC_RESTORE_CNT,
1635 mmDC_EDC_STATE_CNT,
1636 mmGDS_EDC_CNT,
1637 mmGDS_EDC_GRBM_CNT,
1638 mmGDS_EDC_OA_DED,
1639 mmSPI_EDC_CNT,
1640 mmSQC_ATC_EDC_GATCL1_CNT,
1641 mmSQC_EDC_CNT,
1642 mmSQ_EDC_DED_CNT,
1643 mmSQ_EDC_INFO,
1644 mmSQ_EDC_SEC_CNT,
1645 mmTCC_EDC_CNT,
1646 mmTCP_ATC_EDC_GATCL1_CNT,
1647 mmTCP_EDC_CNT,
1648 mmTD_EDC_CNT
1649};
1650
1651static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1652{
1653 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1654 struct amdgpu_ib ib;
f54d1867 1655 struct dma_fence *f = NULL;
ccba7691
AD
1656 int r, i;
1657 u32 tmp;
1658 unsigned total_size, vgpr_offset, sgpr_offset;
1659 u64 gpu_addr;
1660
1661 /* only supported on CZ */
1662 if (adev->asic_type != CHIP_CARRIZO)
1663 return 0;
1664
1665 /* bail if the compute ring is not ready */
1666 if (!ring->ready)
1667 return 0;
1668
1669 tmp = RREG32(mmGB_EDC_MODE);
1670 WREG32(mmGB_EDC_MODE, 0);
1671
1672 total_size =
1673 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1674 total_size +=
1675 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1676 total_size +=
1677 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1678 total_size = ALIGN(total_size, 256);
1679 vgpr_offset = total_size;
1680 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1681 sgpr_offset = total_size;
1682 total_size += sizeof(sgpr_init_compute_shader);
1683
1684 /* allocate an indirect buffer to put the commands in */
1685 memset(&ib, 0, sizeof(ib));
b07c60c0 1686 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1687 if (r) {
1688 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1689 return r;
1690 }
1691
1692 /* load the compute shaders */
1693 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1694 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1695
1696 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1697 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1698
1699 /* init the ib length to 0 */
1700 ib.length_dw = 0;
1701
1702 /* VGPR */
1703 /* write the register state for the compute dispatch */
1704 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1705 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1706 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1707 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1708 }
1709 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1710 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1711 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1712 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1713 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1714 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1715
1716 /* write dispatch packet */
1717 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1718 ib.ptr[ib.length_dw++] = 8; /* x */
1719 ib.ptr[ib.length_dw++] = 1; /* y */
1720 ib.ptr[ib.length_dw++] = 1; /* z */
1721 ib.ptr[ib.length_dw++] =
1722 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1723
1724 /* write CS partial flush packet */
1725 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1726 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1727
1728 /* SGPR1 */
1729 /* write the register state for the compute dispatch */
1730 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1731 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1732 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1733 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1734 }
1735 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1736 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1737 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1738 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1739 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1740 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1741
1742 /* write dispatch packet */
1743 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1744 ib.ptr[ib.length_dw++] = 8; /* x */
1745 ib.ptr[ib.length_dw++] = 1; /* y */
1746 ib.ptr[ib.length_dw++] = 1; /* z */
1747 ib.ptr[ib.length_dw++] =
1748 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1749
1750 /* write CS partial flush packet */
1751 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1752 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1753
1754 /* SGPR2 */
1755 /* write the register state for the compute dispatch */
1756 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1757 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1758 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1759 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1760 }
1761 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1762 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1763 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1764 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1765 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1766 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1767
1768 /* write dispatch packet */
1769 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1770 ib.ptr[ib.length_dw++] = 8; /* x */
1771 ib.ptr[ib.length_dw++] = 1; /* y */
1772 ib.ptr[ib.length_dw++] = 1; /* z */
1773 ib.ptr[ib.length_dw++] =
1774 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1775
1776 /* write CS partial flush packet */
1777 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1778 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1779
1780 /* shedule the ib on the ring */
50ddc75e 1781 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
ccba7691
AD
1782 if (r) {
1783 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1784 goto fail;
1785 }
1786
1787 /* wait for the GPU to finish processing the IB */
f54d1867 1788 r = dma_fence_wait(f, false);
ccba7691
AD
1789 if (r) {
1790 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1791 goto fail;
1792 }
1793
1794 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1795 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1796 WREG32(mmGB_EDC_MODE, tmp);
1797
1798 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1799 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1800 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1801
1802
1803 /* read back registers to clear the counters */
1804 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1805 RREG32(sec_ded_counter_registers[i]);
1806
1807fail:
cc55c45d 1808 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 1809 dma_fence_put(f);
ccba7691
AD
1810
1811 return r;
1812}
1813
68182d90 1814static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1815{
1816 u32 gb_addr_config;
1817 u32 mc_shared_chmap, mc_arb_ramcfg;
1818 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1819 u32 tmp;
68182d90 1820 int ret;
0bde3a95
AD
1821
1822 switch (adev->asic_type) {
1823 case CHIP_TOPAZ:
1824 adev->gfx.config.max_shader_engines = 1;
1825 adev->gfx.config.max_tile_pipes = 2;
1826 adev->gfx.config.max_cu_per_sh = 6;
1827 adev->gfx.config.max_sh_per_se = 1;
1828 adev->gfx.config.max_backends_per_se = 2;
1829 adev->gfx.config.max_texture_channel_caches = 2;
1830 adev->gfx.config.max_gprs = 256;
1831 adev->gfx.config.max_gs_threads = 32;
1832 adev->gfx.config.max_hw_contexts = 8;
1833
1834 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1835 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1836 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1837 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1838 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1839 break;
1840 case CHIP_FIJI:
1841 adev->gfx.config.max_shader_engines = 4;
1842 adev->gfx.config.max_tile_pipes = 16;
1843 adev->gfx.config.max_cu_per_sh = 16;
1844 adev->gfx.config.max_sh_per_se = 1;
1845 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1846 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1847 adev->gfx.config.max_gprs = 256;
1848 adev->gfx.config.max_gs_threads = 32;
1849 adev->gfx.config.max_hw_contexts = 8;
1850
68182d90
FC
1851 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1852 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1853 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1854 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1855 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1856 break;
2cc0c0b5 1857 case CHIP_POLARIS11:
c4642a47 1858 case CHIP_POLARIS12:
68182d90
FC
1859 ret = amdgpu_atombios_get_gfx_info(adev);
1860 if (ret)
1861 return ret;
1862 adev->gfx.config.max_gprs = 256;
1863 adev->gfx.config.max_gs_threads = 32;
1864 adev->gfx.config.max_hw_contexts = 8;
1865
1866 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1867 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1868 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1869 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1870 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1871 break;
2cc0c0b5 1872 case CHIP_POLARIS10:
68182d90
FC
1873 ret = amdgpu_atombios_get_gfx_info(adev);
1874 if (ret)
1875 return ret;
1876 adev->gfx.config.max_gprs = 256;
1877 adev->gfx.config.max_gs_threads = 32;
1878 adev->gfx.config.max_hw_contexts = 8;
1879
0bde3a95
AD
1880 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1881 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1882 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1883 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1884 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1885 break;
1886 case CHIP_TONGA:
1887 adev->gfx.config.max_shader_engines = 4;
1888 adev->gfx.config.max_tile_pipes = 8;
1889 adev->gfx.config.max_cu_per_sh = 8;
1890 adev->gfx.config.max_sh_per_se = 1;
1891 adev->gfx.config.max_backends_per_se = 2;
1892 adev->gfx.config.max_texture_channel_caches = 8;
1893 adev->gfx.config.max_gprs = 256;
1894 adev->gfx.config.max_gs_threads = 32;
1895 adev->gfx.config.max_hw_contexts = 8;
1896
1897 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1898 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1899 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1900 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1901 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1902 break;
1903 case CHIP_CARRIZO:
1904 adev->gfx.config.max_shader_engines = 1;
1905 adev->gfx.config.max_tile_pipes = 2;
1906 adev->gfx.config.max_sh_per_se = 1;
1907 adev->gfx.config.max_backends_per_se = 2;
1908
1909 switch (adev->pdev->revision) {
1910 case 0xc4:
1911 case 0x84:
1912 case 0xc8:
1913 case 0xcc:
b8b339ea
AD
1914 case 0xe1:
1915 case 0xe3:
0bde3a95
AD
1916 /* B10 */
1917 adev->gfx.config.max_cu_per_sh = 8;
1918 break;
1919 case 0xc5:
1920 case 0x81:
1921 case 0x85:
1922 case 0xc9:
1923 case 0xcd:
b8b339ea
AD
1924 case 0xe2:
1925 case 0xe4:
0bde3a95
AD
1926 /* B8 */
1927 adev->gfx.config.max_cu_per_sh = 6;
1928 break;
1929 case 0xc6:
1930 case 0xca:
1931 case 0xce:
b8b339ea 1932 case 0x88:
0bde3a95
AD
1933 /* B6 */
1934 adev->gfx.config.max_cu_per_sh = 6;
1935 break;
1936 case 0xc7:
1937 case 0x87:
1938 case 0xcb:
b8b339ea
AD
1939 case 0xe5:
1940 case 0x89:
0bde3a95
AD
1941 default:
1942 /* B4 */
1943 adev->gfx.config.max_cu_per_sh = 4;
1944 break;
1945 }
1946
1947 adev->gfx.config.max_texture_channel_caches = 2;
1948 adev->gfx.config.max_gprs = 256;
1949 adev->gfx.config.max_gs_threads = 32;
1950 adev->gfx.config.max_hw_contexts = 8;
1951
e3c7656c
SL
1952 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1953 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1954 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1955 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1956 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1957 break;
1958 case CHIP_STONEY:
1959 adev->gfx.config.max_shader_engines = 1;
1960 adev->gfx.config.max_tile_pipes = 2;
1961 adev->gfx.config.max_sh_per_se = 1;
1962 adev->gfx.config.max_backends_per_se = 1;
1963
1964 switch (adev->pdev->revision) {
1965 case 0xc0:
1966 case 0xc1:
1967 case 0xc2:
1968 case 0xc4:
1969 case 0xc8:
1970 case 0xc9:
1971 adev->gfx.config.max_cu_per_sh = 3;
1972 break;
1973 case 0xd0:
1974 case 0xd1:
1975 case 0xd2:
1976 default:
1977 adev->gfx.config.max_cu_per_sh = 2;
1978 break;
1979 }
1980
1981 adev->gfx.config.max_texture_channel_caches = 2;
1982 adev->gfx.config.max_gprs = 256;
1983 adev->gfx.config.max_gs_threads = 16;
1984 adev->gfx.config.max_hw_contexts = 8;
1985
0bde3a95
AD
1986 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1987 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1988 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1989 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1990 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1991 break;
1992 default:
1993 adev->gfx.config.max_shader_engines = 2;
1994 adev->gfx.config.max_tile_pipes = 4;
1995 adev->gfx.config.max_cu_per_sh = 2;
1996 adev->gfx.config.max_sh_per_se = 1;
1997 adev->gfx.config.max_backends_per_se = 2;
1998 adev->gfx.config.max_texture_channel_caches = 4;
1999 adev->gfx.config.max_gprs = 256;
2000 adev->gfx.config.max_gs_threads = 32;
2001 adev->gfx.config.max_hw_contexts = 8;
2002
2003 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2004 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2005 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2006 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2007 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2008 break;
2009 }
2010
2011 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2012 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2013 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2014
2015 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2016 adev->gfx.config.mem_max_burst_length_bytes = 256;
2017 if (adev->flags & AMD_IS_APU) {
2018 /* Get memory bank mapping mode. */
2019 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2020 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2021 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2022
2023 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2024 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2025 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2026
2027 /* Validate settings in case only one DIMM installed. */
2028 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2029 dimm00_addr_map = 0;
2030 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2031 dimm01_addr_map = 0;
2032 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2033 dimm10_addr_map = 0;
2034 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2035 dimm11_addr_map = 0;
2036
2037 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2038 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2039 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2040 adev->gfx.config.mem_row_size_in_kb = 2;
2041 else
2042 adev->gfx.config.mem_row_size_in_kb = 1;
2043 } else {
2044 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2045 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2046 if (adev->gfx.config.mem_row_size_in_kb > 4)
2047 adev->gfx.config.mem_row_size_in_kb = 4;
2048 }
2049
2050 adev->gfx.config.shader_engine_tile_size = 32;
2051 adev->gfx.config.num_gpus = 1;
2052 adev->gfx.config.multi_gpu_tile_size = 64;
2053
2054 /* fix up row size */
2055 switch (adev->gfx.config.mem_row_size_in_kb) {
2056 case 1:
2057 default:
2058 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2059 break;
2060 case 2:
2061 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2062 break;
2063 case 4:
2064 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2065 break;
2066 }
2067 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
2068
2069 return 0;
0bde3a95
AD
2070}
2071
5fc3aeeb 2072static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
2073{
2074 int i, r;
2075 struct amdgpu_ring *ring;
4e638ae9 2076 struct amdgpu_kiq *kiq;
5fc3aeeb 2077 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2078
4e638ae9 2079 /* KIQ event */
d766e6a3 2080 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
4e638ae9
XY
2081 if (r)
2082 return r;
2083
aaa36a97 2084 /* EOP Event */
d766e6a3 2085 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
aaa36a97
AD
2086 if (r)
2087 return r;
2088
2089 /* Privileged reg */
d766e6a3
AD
2090 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2091 &adev->gfx.priv_reg_irq);
aaa36a97
AD
2092 if (r)
2093 return r;
2094
2095 /* Privileged inst */
d766e6a3
AD
2096 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2097 &adev->gfx.priv_inst_irq);
aaa36a97
AD
2098 if (r)
2099 return r;
2100
2101 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2102
2103 gfx_v8_0_scratch_init(adev);
2104
2105 r = gfx_v8_0_init_microcode(adev);
2106 if (r) {
2107 DRM_ERROR("Failed to load gfx firmware!\n");
2108 return r;
2109 }
2110
2b6cd977
EH
2111 r = gfx_v8_0_rlc_init(adev);
2112 if (r) {
2113 DRM_ERROR("Failed to init rlc BOs!\n");
2114 return r;
2115 }
2116
aaa36a97
AD
2117 r = gfx_v8_0_mec_init(adev);
2118 if (r) {
2119 DRM_ERROR("Failed to init MEC BOs!\n");
2120 return r;
2121 }
2122
aaa36a97
AD
2123 /* set up the gfx ring */
2124 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2125 ring = &adev->gfx.gfx_ring[i];
2126 ring->ring_obj = NULL;
2127 sprintf(ring->name, "gfx");
2128 /* no gfx doorbells on iceland */
2129 if (adev->asic_type != CHIP_TOPAZ) {
2130 ring->use_doorbell = true;
2131 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2132 }
2133
79887142
CK
2134 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2135 AMDGPU_CP_IRQ_GFX_EOP);
aaa36a97
AD
2136 if (r)
2137 return r;
2138 }
2139
2140 /* set up the compute queues */
2141 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2142 unsigned irq_type;
2143
2144 /* max 32 queues per MEC */
2145 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2146 DRM_ERROR("Too many (%d) compute rings!\n", i);
2147 break;
2148 }
2149 ring = &adev->gfx.compute_ring[i];
2150 ring->ring_obj = NULL;
2151 ring->use_doorbell = true;
2152 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2153 ring->me = 1; /* first MEC */
2154 ring->pipe = i / 8;
2155 ring->queue = i % 8;
771c8ec1 2156 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
aaa36a97
AD
2157 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2158 /* type-2 packets are deprecated on MEC, use type-3 instead */
79887142
CK
2159 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2160 irq_type);
aaa36a97
AD
2161 if (r)
2162 return r;
2163 }
2164
596c67d0
ML
2165 if (amdgpu_sriov_vf(adev)) {
2166 r = gfx_v8_0_kiq_init(adev);
2167 if (r) {
2168 DRM_ERROR("Failed to init KIQ BOs!\n");
2169 return r;
2170 }
2171
2172 kiq = &adev->gfx.kiq;
2173 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2174 if (r)
2175 return r;
2176
2177 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
0875a242 2178 r = gfx_v8_0_compute_mqd_sw_init(adev);
596c67d0
ML
2179 if (r)
2180 return r;
2181 }
2182
aaa36a97 2183 /* reserve GDS, GWS and OA resource for gfx */
78bbbd9c
CK
2184 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2185 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2186 &adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2187 if (r)
2188 return r;
2189
78bbbd9c
CK
2190 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2191 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2192 &adev->gds.gws_gfx_bo, NULL, NULL);
aaa36a97
AD
2193 if (r)
2194 return r;
2195
78bbbd9c
CK
2196 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2197 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2198 &adev->gds.oa_gfx_bo, NULL, NULL);
aaa36a97
AD
2199 if (r)
2200 return r;
2201
a101a899
KW
2202 adev->gfx.ce_ram_size = 0x8000;
2203
68182d90
FC
2204 r = gfx_v8_0_gpu_early_init(adev);
2205 if (r)
2206 return r;
0bde3a95 2207
aaa36a97
AD
2208 return 0;
2209}
2210
5fc3aeeb 2211static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2212{
2213 int i;
5fc3aeeb 2214 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2215
8640faed
JZ
2216 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2217 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2218 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2219
2220 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2221 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2222 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2223 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2224
596c67d0 2225 if (amdgpu_sriov_vf(adev)) {
0875a242 2226 gfx_v8_0_compute_mqd_sw_fini(adev);
596c67d0
ML
2227 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2228 gfx_v8_0_kiq_fini(adev);
2229 }
2230
aaa36a97 2231 gfx_v8_0_mec_fini(adev);
2b6cd977 2232 gfx_v8_0_rlc_fini(adev);
13331ac3 2233 gfx_v8_0_free_microcode(adev);
2b6cd977 2234
aaa36a97
AD
2235 return 0;
2236}
2237
2238static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2239{
90bea0ab 2240 uint32_t *modearray, *mod2array;
eb64526f
TSD
2241 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2242 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2243 u32 reg_offset;
aaa36a97 2244
90bea0ab
TSD
2245 modearray = adev->gfx.config.tile_mode_array;
2246 mod2array = adev->gfx.config.macrotile_mode_array;
2247
2248 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2249 modearray[reg_offset] = 0;
2250
2251 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2252 mod2array[reg_offset] = 0;
aaa36a97
AD
2253
2254 switch (adev->asic_type) {
2255 case CHIP_TOPAZ:
90bea0ab
TSD
2256 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2275 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2276 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2277 PIPE_CONFIG(ADDR_SURF_P2) |
2278 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2280 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2281 PIPE_CONFIG(ADDR_SURF_P2) |
2282 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2283 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2284 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2285 PIPE_CONFIG(ADDR_SURF_P2));
2286 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2294 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2298 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2302 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2307 PIPE_CONFIG(ADDR_SURF_P2) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2314 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2326 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2330 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2331 PIPE_CONFIG(ADDR_SURF_P2) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2335 PIPE_CONFIG(ADDR_SURF_P2) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2338 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2339 PIPE_CONFIG(ADDR_SURF_P2) |
2340 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2342 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2346 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347 PIPE_CONFIG(ADDR_SURF_P2) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2350 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2351 PIPE_CONFIG(ADDR_SURF_P2) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2354 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2355 PIPE_CONFIG(ADDR_SURF_P2) |
2356 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2358
2359 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362 NUM_BANKS(ADDR_SURF_8_BANK));
2363 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2366 NUM_BANKS(ADDR_SURF_8_BANK));
2367 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2370 NUM_BANKS(ADDR_SURF_8_BANK));
2371 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374 NUM_BANKS(ADDR_SURF_8_BANK));
2375 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2378 NUM_BANKS(ADDR_SURF_8_BANK));
2379 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2382 NUM_BANKS(ADDR_SURF_8_BANK));
2383 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386 NUM_BANKS(ADDR_SURF_8_BANK));
2387 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2390 NUM_BANKS(ADDR_SURF_16_BANK));
2391 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 NUM_BANKS(ADDR_SURF_16_BANK));
2395 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2398 NUM_BANKS(ADDR_SURF_16_BANK));
2399 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2402 NUM_BANKS(ADDR_SURF_16_BANK));
2403 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2406 NUM_BANKS(ADDR_SURF_16_BANK));
2407 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2410 NUM_BANKS(ADDR_SURF_16_BANK));
2411 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2413 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2414 NUM_BANKS(ADDR_SURF_8_BANK));
2415
2416 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2417 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2418 reg_offset != 23)
2419 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2420
2421 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2422 if (reg_offset != 7)
2423 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2424
8cdacf44 2425 break;
af15a2d5 2426 case CHIP_FIJI:
90bea0ab
TSD
2427 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2431 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2435 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2439 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2443 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2447 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2448 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2449 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2451 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2453 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2455 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2457 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2459 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2460 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2461 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2462 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2465 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2466 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2467 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2470 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2471 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2473 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2474 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2477 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2478 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2481 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2482 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2485 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2486 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2487 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2489 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2490 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2492 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2493 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2494 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2496 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2497 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2498 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2500 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2501 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2502 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2505 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2506 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2509 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2510 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2512 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2513 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2514 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2517 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2518 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2520 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2521 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2522 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2523 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2524 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2525 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2526 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2528 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2529 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2530 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2532 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2533 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2536 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2540 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2546 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2547 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2548 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2549
2550 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2553 NUM_BANKS(ADDR_SURF_8_BANK));
2554 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2556 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557 NUM_BANKS(ADDR_SURF_8_BANK));
2558 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2561 NUM_BANKS(ADDR_SURF_8_BANK));
2562 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2565 NUM_BANKS(ADDR_SURF_8_BANK));
2566 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2568 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2569 NUM_BANKS(ADDR_SURF_8_BANK));
2570 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2573 NUM_BANKS(ADDR_SURF_8_BANK));
2574 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2577 NUM_BANKS(ADDR_SURF_8_BANK));
2578 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2580 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2581 NUM_BANKS(ADDR_SURF_8_BANK));
2582 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2584 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2585 NUM_BANKS(ADDR_SURF_8_BANK));
2586 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2588 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2589 NUM_BANKS(ADDR_SURF_8_BANK));
2590 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2592 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2593 NUM_BANKS(ADDR_SURF_8_BANK));
2594 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2596 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2597 NUM_BANKS(ADDR_SURF_8_BANK));
2598 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2601 NUM_BANKS(ADDR_SURF_8_BANK));
2602 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2604 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2605 NUM_BANKS(ADDR_SURF_4_BANK));
2606
2607 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2608 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2609
2610 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2611 if (reg_offset != 7)
2612 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2613
5f2e816b 2614 break;
aaa36a97 2615 case CHIP_TONGA:
90bea0ab
TSD
2616 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2624 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2628 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2632 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2636 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2640 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2641 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2644 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2648 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2649 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2650 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2651 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2653 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2657 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2658 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2659 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2662 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2663 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2665 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2666 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2667 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2675 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2679 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2682 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2686 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2687 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2690 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2691 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2695 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2698 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2699 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2702 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2703 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2706 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2710 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2711 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2714 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2715 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2718 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2719 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2722 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2723 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2727 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2734 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2738
2739 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2741 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2742 NUM_BANKS(ADDR_SURF_16_BANK));
2743 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 NUM_BANKS(ADDR_SURF_16_BANK));
2747 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750 NUM_BANKS(ADDR_SURF_16_BANK));
2751 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754 NUM_BANKS(ADDR_SURF_16_BANK));
2755 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758 NUM_BANKS(ADDR_SURF_16_BANK));
2759 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2762 NUM_BANKS(ADDR_SURF_16_BANK));
2763 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2766 NUM_BANKS(ADDR_SURF_16_BANK));
2767 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2769 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2770 NUM_BANKS(ADDR_SURF_16_BANK));
2771 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2773 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2774 NUM_BANKS(ADDR_SURF_16_BANK));
2775 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2777 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2778 NUM_BANKS(ADDR_SURF_16_BANK));
2779 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2782 NUM_BANKS(ADDR_SURF_16_BANK));
2783 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2786 NUM_BANKS(ADDR_SURF_8_BANK));
2787 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2789 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2790 NUM_BANKS(ADDR_SURF_4_BANK));
2791 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2793 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2794 NUM_BANKS(ADDR_SURF_4_BANK));
2795
2796 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2797 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2798
2799 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2800 if (reg_offset != 7)
2801 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2802
68182d90 2803 break;
2cc0c0b5 2804 case CHIP_POLARIS11:
c4642a47 2805 case CHIP_POLARIS12:
68182d90
FC
2806 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2814 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2818 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2822 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2823 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2826 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2830 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2834 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2835 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2839 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2840 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2841 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2845 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2846 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2847 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2848 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2852 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2853 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2855 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2856 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2857 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2860 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2861 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2864 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2865 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2869 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2872 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2873 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2876 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2877 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2880 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2881 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2884 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2885 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2888 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2889 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2892 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2893 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2896 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2897 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2900 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2901 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2904 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2905 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2908 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2909 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2912 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2913 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2924 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2928
2929 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2931 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2932 NUM_BANKS(ADDR_SURF_16_BANK));
2933
2934 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2937 NUM_BANKS(ADDR_SURF_16_BANK));
2938
2939 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2942 NUM_BANKS(ADDR_SURF_16_BANK));
2943
2944 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2947 NUM_BANKS(ADDR_SURF_16_BANK));
2948
2949 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2952 NUM_BANKS(ADDR_SURF_16_BANK));
2953
2954 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2957 NUM_BANKS(ADDR_SURF_16_BANK));
2958
2959 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2962 NUM_BANKS(ADDR_SURF_16_BANK));
2963
2964 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967 NUM_BANKS(ADDR_SURF_16_BANK));
2968
2969 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2972 NUM_BANKS(ADDR_SURF_16_BANK));
2973
2974 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977 NUM_BANKS(ADDR_SURF_16_BANK));
2978
2979 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982 NUM_BANKS(ADDR_SURF_16_BANK));
2983
2984 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987 NUM_BANKS(ADDR_SURF_16_BANK));
2988
2989 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2990 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2991 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2992 NUM_BANKS(ADDR_SURF_8_BANK));
2993
2994 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2996 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2997 NUM_BANKS(ADDR_SURF_4_BANK));
2998
2999 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3000 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3001
3002 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3003 if (reg_offset != 7)
3004 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3005
3006 break;
2cc0c0b5 3007 case CHIP_POLARIS10:
68182d90
FC
3008 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3012 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3015 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3016 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3018 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3019 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3020 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3022 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3023 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3024 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3025 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3026 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3027 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3028 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3029 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3030 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3032 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3033 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3034 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3036 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3038 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3040 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3041 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3042 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3044 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3047 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3048 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3049 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3050 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3051 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3052 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3054 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3055 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3056 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3058 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3059 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3060 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3061 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3062 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3063 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3064 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3066 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3067 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3070 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3071 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3074 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3075 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3078 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3079 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3082 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3083 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3087 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3090 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3091 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3094 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3095 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3098 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3099 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3102 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3103 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3106 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3107 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3110 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3111 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3114 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3115 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3118 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3122 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3123 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3126 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3130
3131 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3134 NUM_BANKS(ADDR_SURF_16_BANK));
3135
3136 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3139 NUM_BANKS(ADDR_SURF_16_BANK));
3140
3141 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3143 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144 NUM_BANKS(ADDR_SURF_16_BANK));
3145
3146 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3149 NUM_BANKS(ADDR_SURF_16_BANK));
3150
3151 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3154 NUM_BANKS(ADDR_SURF_16_BANK));
3155
3156 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3159 NUM_BANKS(ADDR_SURF_16_BANK));
3160
3161 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3164 NUM_BANKS(ADDR_SURF_16_BANK));
3165
3166 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3169 NUM_BANKS(ADDR_SURF_16_BANK));
3170
3171 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3174 NUM_BANKS(ADDR_SURF_16_BANK));
3175
3176 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3179 NUM_BANKS(ADDR_SURF_16_BANK));
3180
3181 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3182 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3183 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3184 NUM_BANKS(ADDR_SURF_16_BANK));
3185
3186 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3189 NUM_BANKS(ADDR_SURF_8_BANK));
3190
3191 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3194 NUM_BANKS(ADDR_SURF_4_BANK));
3195
3196 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3199 NUM_BANKS(ADDR_SURF_4_BANK));
3200
3201 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3202 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3203
3204 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3205 if (reg_offset != 7)
3206 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3207
aaa36a97 3208 break;
e3c7656c 3209 case CHIP_STONEY:
90bea0ab
TSD
3210 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3211 PIPE_CONFIG(ADDR_SURF_P2) |
3212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3214 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3215 PIPE_CONFIG(ADDR_SURF_P2) |
3216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3218 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3219 PIPE_CONFIG(ADDR_SURF_P2) |
3220 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3222 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3223 PIPE_CONFIG(ADDR_SURF_P2) |
3224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3226 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3227 PIPE_CONFIG(ADDR_SURF_P2) |
3228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3230 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3231 PIPE_CONFIG(ADDR_SURF_P2) |
3232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3234 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3235 PIPE_CONFIG(ADDR_SURF_P2) |
3236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3238 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3239 PIPE_CONFIG(ADDR_SURF_P2));
3240 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3241 PIPE_CONFIG(ADDR_SURF_P2) |
3242 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3244 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3245 PIPE_CONFIG(ADDR_SURF_P2) |
3246 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3248 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3249 PIPE_CONFIG(ADDR_SURF_P2) |
3250 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3252 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3253 PIPE_CONFIG(ADDR_SURF_P2) |
3254 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3257 PIPE_CONFIG(ADDR_SURF_P2) |
3258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3260 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3261 PIPE_CONFIG(ADDR_SURF_P2) |
3262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3265 PIPE_CONFIG(ADDR_SURF_P2) |
3266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3268 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3269 PIPE_CONFIG(ADDR_SURF_P2) |
3270 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3272 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3273 PIPE_CONFIG(ADDR_SURF_P2) |
3274 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3276 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3280 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3284 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3288 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3289 PIPE_CONFIG(ADDR_SURF_P2) |
3290 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3292 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3293 PIPE_CONFIG(ADDR_SURF_P2) |
3294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3296 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3297 PIPE_CONFIG(ADDR_SURF_P2) |
3298 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3300 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3301 PIPE_CONFIG(ADDR_SURF_P2) |
3302 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3305 PIPE_CONFIG(ADDR_SURF_P2) |
3306 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3308 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3309 PIPE_CONFIG(ADDR_SURF_P2) |
3310 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3312
3313 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316 NUM_BANKS(ADDR_SURF_8_BANK));
3317 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320 NUM_BANKS(ADDR_SURF_8_BANK));
3321 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3324 NUM_BANKS(ADDR_SURF_8_BANK));
3325 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3328 NUM_BANKS(ADDR_SURF_8_BANK));
3329 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3332 NUM_BANKS(ADDR_SURF_8_BANK));
3333 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3336 NUM_BANKS(ADDR_SURF_8_BANK));
3337 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3340 NUM_BANKS(ADDR_SURF_8_BANK));
3341 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3344 NUM_BANKS(ADDR_SURF_16_BANK));
3345 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348 NUM_BANKS(ADDR_SURF_16_BANK));
3349 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3352 NUM_BANKS(ADDR_SURF_16_BANK));
3353 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356 NUM_BANKS(ADDR_SURF_16_BANK));
3357 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360 NUM_BANKS(ADDR_SURF_16_BANK));
3361 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3364 NUM_BANKS(ADDR_SURF_16_BANK));
3365 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368 NUM_BANKS(ADDR_SURF_8_BANK));
3369
3370 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3371 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3372 reg_offset != 23)
3373 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3374
3375 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3376 if (reg_offset != 7)
3377 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3378
e3c7656c 3379 break;
aaa36a97 3380 default:
90bea0ab
TSD
3381 dev_warn(adev->dev,
3382 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3383 adev->asic_type);
3384
3385 case CHIP_CARRIZO:
3386 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3387 PIPE_CONFIG(ADDR_SURF_P2) |
3388 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3389 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3390 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3391 PIPE_CONFIG(ADDR_SURF_P2) |
3392 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3393 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3394 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3395 PIPE_CONFIG(ADDR_SURF_P2) |
3396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3397 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3398 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3399 PIPE_CONFIG(ADDR_SURF_P2) |
3400 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3401 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3402 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3403 PIPE_CONFIG(ADDR_SURF_P2) |
3404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3406 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3407 PIPE_CONFIG(ADDR_SURF_P2) |
3408 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3409 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3410 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3411 PIPE_CONFIG(ADDR_SURF_P2) |
3412 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3413 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3414 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3415 PIPE_CONFIG(ADDR_SURF_P2));
3416 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3417 PIPE_CONFIG(ADDR_SURF_P2) |
3418 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3420 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3421 PIPE_CONFIG(ADDR_SURF_P2) |
3422 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3424 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3425 PIPE_CONFIG(ADDR_SURF_P2) |
3426 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3428 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3429 PIPE_CONFIG(ADDR_SURF_P2) |
3430 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3432 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3433 PIPE_CONFIG(ADDR_SURF_P2) |
3434 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3436 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3437 PIPE_CONFIG(ADDR_SURF_P2) |
3438 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3440 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3441 PIPE_CONFIG(ADDR_SURF_P2) |
3442 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3444 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3445 PIPE_CONFIG(ADDR_SURF_P2) |
3446 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3448 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3449 PIPE_CONFIG(ADDR_SURF_P2) |
3450 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3452 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3453 PIPE_CONFIG(ADDR_SURF_P2) |
3454 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3456 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3457 PIPE_CONFIG(ADDR_SURF_P2) |
3458 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3460 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3461 PIPE_CONFIG(ADDR_SURF_P2) |
3462 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3464 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3465 PIPE_CONFIG(ADDR_SURF_P2) |
3466 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3468 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3469 PIPE_CONFIG(ADDR_SURF_P2) |
3470 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3472 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3473 PIPE_CONFIG(ADDR_SURF_P2) |
3474 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3476 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3477 PIPE_CONFIG(ADDR_SURF_P2) |
3478 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3480 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3481 PIPE_CONFIG(ADDR_SURF_P2) |
3482 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3484 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3485 PIPE_CONFIG(ADDR_SURF_P2) |
3486 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3488
3489 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3492 NUM_BANKS(ADDR_SURF_8_BANK));
3493 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3496 NUM_BANKS(ADDR_SURF_8_BANK));
3497 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3500 NUM_BANKS(ADDR_SURF_8_BANK));
3501 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3504 NUM_BANKS(ADDR_SURF_8_BANK));
3505 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3508 NUM_BANKS(ADDR_SURF_8_BANK));
3509 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3512 NUM_BANKS(ADDR_SURF_8_BANK));
3513 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3516 NUM_BANKS(ADDR_SURF_8_BANK));
3517 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3520 NUM_BANKS(ADDR_SURF_16_BANK));
3521 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3524 NUM_BANKS(ADDR_SURF_16_BANK));
3525 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3528 NUM_BANKS(ADDR_SURF_16_BANK));
3529 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3532 NUM_BANKS(ADDR_SURF_16_BANK));
3533 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3536 NUM_BANKS(ADDR_SURF_16_BANK));
3537 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3540 NUM_BANKS(ADDR_SURF_16_BANK));
3541 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3544 NUM_BANKS(ADDR_SURF_8_BANK));
3545
3546 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3547 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3548 reg_offset != 23)
3549 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3550
3551 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3552 if (reg_offset != 7)
3553 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3554
3555 break;
aaa36a97
AD
3556 }
3557}
3558
05fb7291 3559static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3560 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3561{
9559ef5b
TSD
3562 u32 data;
3563
3564 if (instance == 0xffffffff)
3565 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3566 else
3567 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97 3568
5003f278 3569 if (se_num == 0xffffffff)
aaa36a97 3570 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
5003f278 3571 else
aaa36a97 3572 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
5003f278
TSD
3573
3574 if (sh_num == 0xffffffff)
3575 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3576 else
aaa36a97 3577 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
5003f278 3578
aaa36a97
AD
3579 WREG32(mmGRBM_GFX_INDEX, data);
3580}
3581
8f8e00c1
AD
3582static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3583{
3584 return (u32)((1ULL << bit_width) - 1);
3585}
3586
3587static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3588{
3589 u32 data, mask;
3590
5003f278
TSD
3591 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3592 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
aaa36a97 3593
5003f278 3594 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
aaa36a97 3595
8f8e00c1
AD
3596 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3597 adev->gfx.config.max_sh_per_se);
aaa36a97 3598
8f8e00c1 3599 return (~data) & mask;
aaa36a97
AD
3600}
3601
167ac573
HR
3602static void
3603gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3604{
3605 switch (adev->asic_type) {
3606 case CHIP_FIJI:
3607 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3608 RB_XSEL2(1) | PKR_MAP(2) |
3609 PKR_XSEL(1) | PKR_YSEL(1) |
3610 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3611 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3612 SE_PAIR_YSEL(2);
3613 break;
3614 case CHIP_TONGA:
3615 case CHIP_POLARIS10:
3616 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3617 SE_XSEL(1) | SE_YSEL(1);
3618 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3619 SE_PAIR_YSEL(2);
3620 break;
3621 case CHIP_TOPAZ:
3622 case CHIP_CARRIZO:
3623 *rconf |= RB_MAP_PKR0(2);
3624 *rconf1 |= 0x0;
3625 break;
3626 case CHIP_POLARIS11:
c4642a47 3627 case CHIP_POLARIS12:
167ac573
HR
3628 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3629 SE_XSEL(1) | SE_YSEL(1);
3630 *rconf1 |= 0x0;
3631 break;
3632 case CHIP_STONEY:
3633 *rconf |= 0x0;
3634 *rconf1 |= 0x0;
3635 break;
3636 default:
3637 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3638 break;
3639 }
3640}
3641
3642static void
3643gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3644 u32 raster_config, u32 raster_config_1,
3645 unsigned rb_mask, unsigned num_rb)
3646{
3647 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3648 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3649 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3650 unsigned rb_per_se = num_rb / num_se;
3651 unsigned se_mask[4];
3652 unsigned se;
3653
3654 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3655 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3656 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3657 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3658
3659 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3660 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3661 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3662
3663 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3664 (!se_mask[2] && !se_mask[3]))) {
3665 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3666
3667 if (!se_mask[0] && !se_mask[1]) {
3668 raster_config_1 |=
3669 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3670 } else {
3671 raster_config_1 |=
3672 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3673 }
3674 }
3675
3676 for (se = 0; se < num_se; se++) {
3677 unsigned raster_config_se = raster_config;
3678 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3679 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3680 int idx = (se / 2) * 2;
3681
3682 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3683 raster_config_se &= ~SE_MAP_MASK;
3684
3685 if (!se_mask[idx]) {
3686 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3687 } else {
3688 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3689 }
3690 }
3691
3692 pkr0_mask &= rb_mask;
3693 pkr1_mask &= rb_mask;
3694 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3695 raster_config_se &= ~PKR_MAP_MASK;
3696
3697 if (!pkr0_mask) {
3698 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3699 } else {
3700 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3701 }
3702 }
3703
3704 if (rb_per_se >= 2) {
3705 unsigned rb0_mask = 1 << (se * rb_per_se);
3706 unsigned rb1_mask = rb0_mask << 1;
3707
3708 rb0_mask &= rb_mask;
3709 rb1_mask &= rb_mask;
3710 if (!rb0_mask || !rb1_mask) {
3711 raster_config_se &= ~RB_MAP_PKR0_MASK;
3712
3713 if (!rb0_mask) {
3714 raster_config_se |=
3715 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3716 } else {
3717 raster_config_se |=
3718 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3719 }
3720 }
3721
3722 if (rb_per_se > 2) {
3723 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3724 rb1_mask = rb0_mask << 1;
3725 rb0_mask &= rb_mask;
3726 rb1_mask &= rb_mask;
3727 if (!rb0_mask || !rb1_mask) {
3728 raster_config_se &= ~RB_MAP_PKR1_MASK;
3729
3730 if (!rb0_mask) {
3731 raster_config_se |=
3732 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3733 } else {
3734 raster_config_se |=
3735 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3736 }
3737 }
3738 }
3739 }
3740
3741 /* GRBM_GFX_INDEX has a different offset on VI */
3742 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3743 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3744 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3745 }
3746
3747 /* GRBM_GFX_INDEX has a different offset on VI */
3748 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3749}
3750
8f8e00c1 3751static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3752{
3753 int i, j;
aac1e3ca 3754 u32 data;
167ac573 3755 u32 raster_config = 0, raster_config_1 = 0;
8f8e00c1 3756 u32 active_rbs = 0;
6157bd7a
FC
3757 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3758 adev->gfx.config.max_sh_per_se;
167ac573 3759 unsigned num_rb_pipes;
aaa36a97
AD
3760
3761 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3762 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3763 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3764 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3765 data = gfx_v8_0_get_rb_active_bitmap(adev);
3766 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3767 rb_bitmap_width_per_sh);
aaa36a97
AD
3768 }
3769 }
9559ef5b 3770 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97 3771
8f8e00c1 3772 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3773 adev->gfx.config.num_rbs = hweight32(active_rbs);
167ac573
HR
3774
3775 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3776 adev->gfx.config.max_shader_engines, 16);
3777
3778 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3779
3780 if (!adev->gfx.config.backend_enable_mask ||
3781 adev->gfx.config.num_rbs >= num_rb_pipes) {
3782 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3783 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3784 } else {
3785 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3786 adev->gfx.config.backend_enable_mask,
3787 num_rb_pipes);
3788 }
3789
392f0c77
AD
3790 /* cache the values for userspace */
3791 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3792 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3793 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3794 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3795 RREG32(mmCC_RB_BACKEND_DISABLE);
3796 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3797 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3798 adev->gfx.config.rb_config[i][j].raster_config =
3799 RREG32(mmPA_SC_RASTER_CONFIG);
3800 adev->gfx.config.rb_config[i][j].raster_config_1 =
3801 RREG32(mmPA_SC_RASTER_CONFIG_1);
3802 }
3803 }
3804 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
167ac573 3805 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
3806}
3807
cd06bf68 3808/**
35c7a952 3809 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
3810 *
3811 * @rdev: amdgpu_device pointer
3812 *
3813 * Initialize compute vmid sh_mem registers
3814 *
3815 */
3816#define DEFAULT_SH_MEM_BASES (0x6000)
3817#define FIRST_COMPUTE_VMID (8)
3818#define LAST_COMPUTE_VMID (16)
35c7a952 3819static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3820{
3821 int i;
3822 uint32_t sh_mem_config;
3823 uint32_t sh_mem_bases;
3824
3825 /*
3826 * Configure apertures:
3827 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3828 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3829 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3830 */
3831 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3832
3833 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3834 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3835 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3836 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3837 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3838 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3839
3840 mutex_lock(&adev->srbm_mutex);
3841 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3842 vi_srbm_select(adev, 0, 0, 0, i);
3843 /* CP and shaders */
3844 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3845 WREG32(mmSH_MEM_APE1_BASE, 1);
3846 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3847 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3848 }
3849 vi_srbm_select(adev, 0, 0, 0, 0);
3850 mutex_unlock(&adev->srbm_mutex);
3851}
3852
df6e2c4a
JZ
3853static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3854{
3855 switch (adev->asic_type) {
3856 default:
3857 adev->gfx.config.double_offchip_lds_buf = 1;
3858 break;
3859 case CHIP_CARRIZO:
3860 case CHIP_STONEY:
3861 adev->gfx.config.double_offchip_lds_buf = 0;
3862 break;
3863 }
3864}
3865
aaa36a97
AD
3866static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3867{
8fe73328 3868 u32 tmp, sh_static_mem_cfg;
aaa36a97
AD
3869 int i;
3870
61cb8cef 3871 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
0bde3a95
AD
3872 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3873 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3874 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3875
3876 gfx_v8_0_tiling_mode_table_init(adev);
8f8e00c1 3877 gfx_v8_0_setup_rb(adev);
7dae69a2 3878 gfx_v8_0_get_cu_info(adev);
df6e2c4a 3879 gfx_v8_0_config_init(adev);
aaa36a97
AD
3880
3881 /* XXX SH_MEM regs */
3882 /* where to put LDS, scratch, GPUVM in FSA64 space */
8fe73328
JZ
3883 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3884 SWIZZLE_ENABLE, 1);
3885 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3886 ELEMENT_SIZE, 1);
3887 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3888 INDEX_STRIDE, 3);
aaa36a97 3889 mutex_lock(&adev->srbm_mutex);
8fe73328 3890 for (i = 0; i < adev->vm_manager.num_ids; i++) {
aaa36a97
AD
3891 vi_srbm_select(adev, 0, 0, 0, i);
3892 /* CP and shaders */
3893 if (i == 0) {
3894 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3895 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3896 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3897 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3898 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328 3899 WREG32(mmSH_MEM_BASES, 0);
aaa36a97
AD
3900 } else {
3901 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
8fe73328 3902 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3903 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3904 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3905 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328
JZ
3906 tmp = adev->mc.shared_aperture_start >> 48;
3907 WREG32(mmSH_MEM_BASES, tmp);
aaa36a97
AD
3908 }
3909
3910 WREG32(mmSH_MEM_APE1_BASE, 1);
3911 WREG32(mmSH_MEM_APE1_LIMIT, 0);
8fe73328 3912 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
aaa36a97
AD
3913 }
3914 vi_srbm_select(adev, 0, 0, 0, 0);
3915 mutex_unlock(&adev->srbm_mutex);
3916
35c7a952 3917 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3918
aaa36a97
AD
3919 mutex_lock(&adev->grbm_idx_mutex);
3920 /*
3921 * making sure that the following register writes will be broadcasted
3922 * to all the shaders
3923 */
9559ef5b 3924 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3925
3926 WREG32(mmPA_SC_FIFO_SIZE,
3927 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3928 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3929 (adev->gfx.config.sc_prim_fifo_size_backend <<
3930 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3931 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3932 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3933 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3934 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
d2383267 3935
3936 tmp = RREG32(mmSPI_ARB_PRIORITY);
3937 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3938 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3939 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3940 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3941 WREG32(mmSPI_ARB_PRIORITY, tmp);
3942
aaa36a97
AD
3943 mutex_unlock(&adev->grbm_idx_mutex);
3944
3945}
3946
3947static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3948{
3949 u32 i, j, k;
3950 u32 mask;
3951
3952 mutex_lock(&adev->grbm_idx_mutex);
3953 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3954 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3955 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
3956 for (k = 0; k < adev->usec_timeout; k++) {
3957 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3958 break;
3959 udelay(1);
3960 }
3961 }
3962 }
9559ef5b 3963 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3964 mutex_unlock(&adev->grbm_idx_mutex);
3965
3966 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3967 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3968 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3969 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3970 for (k = 0; k < adev->usec_timeout; k++) {
3971 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3972 break;
3973 udelay(1);
3974 }
3975}
3976
3977static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3978 bool enable)
3979{
3980 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3981
0d07db7e
TSD
3982 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3984 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3986
aaa36a97
AD
3987 WREG32(mmCP_INT_CNTL_RING0, tmp);
3988}
3989
2b6cd977
EH
3990static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3991{
3992 /* csib */
3993 WREG32(mmRLC_CSIB_ADDR_HI,
3994 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3995 WREG32(mmRLC_CSIB_ADDR_LO,
3996 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3997 WREG32(mmRLC_CSIB_LENGTH,
3998 adev->gfx.rlc.clear_state_size);
3999}
4000
4001static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4002 int ind_offset,
4003 int list_size,
4004 int *unique_indices,
4005 int *indices_count,
4006 int max_indices,
4007 int *ind_start_offsets,
4008 int *offset_count,
4009 int max_offset)
4010{
4011 int indices;
4012 bool new_entry = true;
4013
4014 for (; ind_offset < list_size; ind_offset++) {
4015
4016 if (new_entry) {
4017 new_entry = false;
4018 ind_start_offsets[*offset_count] = ind_offset;
4019 *offset_count = *offset_count + 1;
4020 BUG_ON(*offset_count >= max_offset);
4021 }
4022
4023 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4024 new_entry = true;
4025 continue;
4026 }
4027
4028 ind_offset += 2;
4029
4030 /* look for the matching indice */
4031 for (indices = 0;
4032 indices < *indices_count;
4033 indices++) {
4034 if (unique_indices[indices] ==
4035 register_list_format[ind_offset])
4036 break;
4037 }
4038
4039 if (indices >= *indices_count) {
4040 unique_indices[*indices_count] =
4041 register_list_format[ind_offset];
4042 indices = *indices_count;
4043 *indices_count = *indices_count + 1;
4044 BUG_ON(*indices_count >= max_indices);
4045 }
4046
4047 register_list_format[ind_offset] = indices;
4048 }
4049}
4050
4051static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4052{
4053 int i, temp, data;
4054 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4055 int indices_count = 0;
4056 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4057 int offset_count = 0;
4058
4059 int list_size;
4060 unsigned int *register_list_format =
4061 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3f12325a 4062 if (!register_list_format)
2b6cd977
EH
4063 return -ENOMEM;
4064 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4065 adev->gfx.rlc.reg_list_format_size_bytes);
4066
4067 gfx_v8_0_parse_ind_reg_list(register_list_format,
4068 RLC_FormatDirectRegListLength,
4069 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4070 unique_indices,
4071 &indices_count,
4072 sizeof(unique_indices) / sizeof(int),
4073 indirect_start_offsets,
4074 &offset_count,
4075 sizeof(indirect_start_offsets)/sizeof(int));
4076
4077 /* save and restore list */
61cb8cef 4078 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
2b6cd977
EH
4079
4080 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4081 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4082 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4083
4084 /* indirect list */
4085 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4086 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4087 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4088
4089 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4090 list_size = list_size >> 1;
4091 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4092 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4093
4094 /* starting offsets starts */
4095 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4096 adev->gfx.rlc.starting_offsets_start);
4097 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4098 WREG32(mmRLC_GPM_SCRATCH_DATA,
4099 indirect_start_offsets[i]);
4100
4101 /* unique indices */
4102 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4103 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4104 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
202e0b22 4105 if (unique_indices[i] != 0) {
b85c9d2a
ML
4106 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4107 WREG32(data + i, unique_indices[i] >> 20);
202e0b22 4108 }
2b6cd977
EH
4109 }
4110 kfree(register_list_format);
4111
4112 return 0;
4113}
4114
4115static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4116{
61cb8cef 4117 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
2b6cd977
EH
4118}
4119
fb16007b 4120static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
4121{
4122 uint32_t data;
4123
c4d17b81
RZ
4124 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4125
4126 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4127 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4128 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4129 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4130 WREG32(mmRLC_PG_DELAY, data);
4131
4132 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4133 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4134
f4bfffdd
EH
4135}
4136
2c547165
AD
4137static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4138 bool enable)
4139{
61cb8cef 4140 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
2c547165
AD
4141}
4142
4143static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4144 bool enable)
4145{
61cb8cef 4146 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
2c547165
AD
4147}
4148
4149static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4150{
eb584241 4151 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
2c547165
AD
4152}
4153
2b6cd977
EH
4154static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4155{
c4d17b81
RZ
4156 if ((adev->asic_type == CHIP_CARRIZO) ||
4157 (adev->asic_type == CHIP_STONEY)) {
2b6cd977
EH
4158 gfx_v8_0_init_csb(adev);
4159 gfx_v8_0_init_save_restore_list(adev);
4160 gfx_v8_0_enable_save_restore_machine(adev);
c4d17b81
RZ
4161 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4162 gfx_v8_0_init_power_gating(adev);
4163 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
c4642a47
JZ
4164 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4165 (adev->asic_type == CHIP_POLARIS12)) {
c4d17b81
RZ
4166 gfx_v8_0_init_csb(adev);
4167 gfx_v8_0_init_save_restore_list(adev);
4168 gfx_v8_0_enable_save_restore_machine(adev);
4169 gfx_v8_0_init_power_gating(adev);
2b6cd977 4170 }
c4d17b81 4171
2b6cd977
EH
4172}
4173
761c2e82 4174static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
aaa36a97 4175{
61cb8cef 4176 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
aaa36a97
AD
4177
4178 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
aaa36a97
AD
4179 gfx_v8_0_wait_for_rlc_serdes(adev);
4180}
4181
4182static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4183{
61cb8cef 4184 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
aaa36a97 4185 udelay(50);
61cb8cef
TSD
4186
4187 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
aaa36a97
AD
4188 udelay(50);
4189}
4190
4191static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4192{
61cb8cef 4193 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
aaa36a97
AD
4194
4195 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 4196 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4197 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4198
4199 udelay(50);
4200}
4201
4202static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4203{
4204 const struct rlc_firmware_header_v2_0 *hdr;
4205 const __le32 *fw_data;
4206 unsigned i, fw_size;
4207
4208 if (!adev->gfx.rlc_fw)
4209 return -EINVAL;
4210
4211 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4212 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
4213
4214 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4215 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4216 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4217
4218 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4219 for (i = 0; i < fw_size; i++)
4220 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4221 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4222
4223 return 0;
4224}
4225
4226static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4227{
4228 int r;
6ae81452 4229 u32 tmp;
aaa36a97
AD
4230
4231 gfx_v8_0_rlc_stop(adev);
4232
4233 /* disable CG */
6ae81452
AD
4234 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4235 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4236 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4237 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
2cc0c0b5 4238 if (adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
4239 adev->asic_type == CHIP_POLARIS10 ||
4240 adev->asic_type == CHIP_POLARIS12) {
6ae81452
AD
4241 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4242 tmp &= ~0x3;
4243 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4244 }
aaa36a97
AD
4245
4246 /* disable PG */
4247 WREG32(mmRLC_PG_CNTL, 0);
4248
4249 gfx_v8_0_rlc_reset(adev);
2b6cd977
EH
4250 gfx_v8_0_init_pg(adev);
4251
e61710c5 4252 if (!adev->pp_enabled) {
ba5c2a87
RZ
4253 if (!adev->firmware.smu_load) {
4254 /* legacy rlc firmware loading */
4255 r = gfx_v8_0_rlc_load_microcode(adev);
4256 if (r)
4257 return r;
4258 } else {
4259 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4260 AMDGPU_UCODE_ID_RLC_G);
4261 if (r)
4262 return -EINVAL;
4263 }
aaa36a97
AD
4264 }
4265
4266 gfx_v8_0_rlc_start(adev);
4267
4268 return 0;
4269}
4270
4271static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4272{
4273 int i;
4274 u32 tmp = RREG32(mmCP_ME_CNTL);
4275
4276 if (enable) {
4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4278 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4279 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4280 } else {
4281 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4282 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4283 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4284 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4285 adev->gfx.gfx_ring[i].ready = false;
4286 }
4287 WREG32(mmCP_ME_CNTL, tmp);
4288 udelay(50);
4289}
4290
4291static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4292{
4293 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4294 const struct gfx_firmware_header_v1_0 *ce_hdr;
4295 const struct gfx_firmware_header_v1_0 *me_hdr;
4296 const __le32 *fw_data;
4297 unsigned i, fw_size;
4298
4299 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4300 return -EINVAL;
4301
4302 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4303 adev->gfx.pfp_fw->data;
4304 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4305 adev->gfx.ce_fw->data;
4306 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4307 adev->gfx.me_fw->data;
4308
4309 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4310 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4311 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4312
4313 gfx_v8_0_cp_gfx_enable(adev, false);
4314
4315 /* PFP */
4316 fw_data = (const __le32 *)
4317 (adev->gfx.pfp_fw->data +
4318 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4319 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4320 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4321 for (i = 0; i < fw_size; i++)
4322 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4323 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4324
4325 /* CE */
4326 fw_data = (const __le32 *)
4327 (adev->gfx.ce_fw->data +
4328 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4329 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4330 WREG32(mmCP_CE_UCODE_ADDR, 0);
4331 for (i = 0; i < fw_size; i++)
4332 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4333 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4334
4335 /* ME */
4336 fw_data = (const __le32 *)
4337 (adev->gfx.me_fw->data +
4338 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4339 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4340 WREG32(mmCP_ME_RAM_WADDR, 0);
4341 for (i = 0; i < fw_size; i++)
4342 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4343 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4344
4345 return 0;
4346}
4347
4348static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4349{
4350 u32 count = 0;
4351 const struct cs_section_def *sect = NULL;
4352 const struct cs_extent_def *ext = NULL;
4353
4354 /* begin clear state */
4355 count += 2;
4356 /* context control state */
4357 count += 3;
4358
4359 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4360 for (ext = sect->section; ext->extent != NULL; ++ext) {
4361 if (sect->id == SECT_CONTEXT)
4362 count += 2 + ext->reg_count;
4363 else
4364 return 0;
4365 }
4366 }
4367 /* pa_sc_raster_config/pa_sc_raster_config1 */
4368 count += 4;
4369 /* end clear state */
4370 count += 2;
4371 /* clear state */
4372 count += 2;
4373
4374 return count;
4375}
4376
4377static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4378{
4379 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4380 const struct cs_section_def *sect = NULL;
4381 const struct cs_extent_def *ext = NULL;
4382 int r, i;
4383
4384 /* init the CP */
4385 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4386 WREG32(mmCP_ENDIAN_SWAP, 0);
4387 WREG32(mmCP_DEVICE_ID, 1);
4388
4389 gfx_v8_0_cp_gfx_enable(adev, true);
4390
a27de35c 4391 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4392 if (r) {
4393 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4394 return r;
4395 }
4396
4397 /* clear state buffer */
4398 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4399 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4400
4401 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4402 amdgpu_ring_write(ring, 0x80000000);
4403 amdgpu_ring_write(ring, 0x80000000);
4404
4405 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4406 for (ext = sect->section; ext->extent != NULL; ++ext) {
4407 if (sect->id == SECT_CONTEXT) {
4408 amdgpu_ring_write(ring,
4409 PACKET3(PACKET3_SET_CONTEXT_REG,
4410 ext->reg_count));
4411 amdgpu_ring_write(ring,
4412 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4413 for (i = 0; i < ext->reg_count; i++)
4414 amdgpu_ring_write(ring, ext->extent[i]);
4415 }
4416 }
4417 }
4418
4419 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4420 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4421 switch (adev->asic_type) {
4422 case CHIP_TONGA:
2cc0c0b5 4423 case CHIP_POLARIS10:
aaa36a97
AD
4424 amdgpu_ring_write(ring, 0x16000012);
4425 amdgpu_ring_write(ring, 0x0000002A);
4426 break;
2cc0c0b5 4427 case CHIP_POLARIS11:
c4642a47 4428 case CHIP_POLARIS12:
68182d90
FC
4429 amdgpu_ring_write(ring, 0x16000012);
4430 amdgpu_ring_write(ring, 0x00000000);
4431 break;
fa676048
FC
4432 case CHIP_FIJI:
4433 amdgpu_ring_write(ring, 0x3a00161a);
4434 amdgpu_ring_write(ring, 0x0000002e);
4435 break;
aaa36a97
AD
4436 case CHIP_CARRIZO:
4437 amdgpu_ring_write(ring, 0x00000002);
4438 amdgpu_ring_write(ring, 0x00000000);
4439 break;
d1a7f7aa
KW
4440 case CHIP_TOPAZ:
4441 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4442 0x00000000 : 0x00000002);
4443 amdgpu_ring_write(ring, 0x00000000);
4444 break;
e3c7656c
SL
4445 case CHIP_STONEY:
4446 amdgpu_ring_write(ring, 0x00000000);
4447 amdgpu_ring_write(ring, 0x00000000);
4448 break;
aaa36a97
AD
4449 default:
4450 BUG();
4451 }
4452
4453 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4454 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4455
4456 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4457 amdgpu_ring_write(ring, 0);
4458
4459 /* init the CE partitions */
4460 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4461 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4462 amdgpu_ring_write(ring, 0x8000);
4463 amdgpu_ring_write(ring, 0x8000);
4464
a27de35c 4465 amdgpu_ring_commit(ring);
aaa36a97
AD
4466
4467 return 0;
4468}
4469
4470static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4471{
4472 struct amdgpu_ring *ring;
4473 u32 tmp;
4474 u32 rb_bufsz;
42e8cb50 4475 u64 rb_addr, rptr_addr, wptr_gpu_addr;
aaa36a97
AD
4476 int r;
4477
4478 /* Set the write pointer delay */
4479 WREG32(mmCP_RB_WPTR_DELAY, 0);
4480
4481 /* set the RB to use vmid 0 */
4482 WREG32(mmCP_RB_VMID, 0);
4483
4484 /* Set ring buffer size */
4485 ring = &adev->gfx.gfx_ring[0];
4486 rb_bufsz = order_base_2(ring->ring_size / 8);
4487 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4489 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4490 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4491#ifdef __BIG_ENDIAN
4492 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4493#endif
4494 WREG32(mmCP_RB0_CNTL, tmp);
4495
4496 /* Initialize the ring buffer's read and write pointers */
4497 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4498 ring->wptr = 0;
536fbf94 4499 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
4500
4501 /* set the wb address wether it's enabled or not */
4502 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4503 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4504 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4505
42e8cb50
FM
4506 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4507 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4508 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
aaa36a97
AD
4509 mdelay(1);
4510 WREG32(mmCP_RB0_CNTL, tmp);
4511
4512 rb_addr = ring->gpu_addr >> 8;
4513 WREG32(mmCP_RB0_BASE, rb_addr);
4514 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4515
4516 /* no gfx doorbells on iceland */
4517 if (adev->asic_type != CHIP_TOPAZ) {
4518 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4519 if (ring->use_doorbell) {
4520 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4521 DOORBELL_OFFSET, ring->doorbell_index);
68182d90
FC
4522 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4523 DOORBELL_HIT, 0);
aaa36a97
AD
4524 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4525 DOORBELL_EN, 1);
4526 } else {
4527 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4528 DOORBELL_EN, 0);
4529 }
4530 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4531
4532 if (adev->asic_type == CHIP_TONGA) {
4533 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4534 DOORBELL_RANGE_LOWER,
4535 AMDGPU_DOORBELL_GFX_RING0);
4536 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4537
4538 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4539 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4540 }
4541
4542 }
4543
4544 /* start the ring */
f6bd7942 4545 amdgpu_ring_clear_ring(ring);
aaa36a97
AD
4546 gfx_v8_0_cp_gfx_start(adev);
4547 ring->ready = true;
4548 r = amdgpu_ring_test_ring(ring);
5003f278 4549 if (r)
aaa36a97 4550 ring->ready = false;
aaa36a97 4551
5003f278 4552 return r;
aaa36a97
AD
4553}
4554
4555static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4556{
4557 int i;
4558
4559 if (enable) {
4560 WREG32(mmCP_MEC_CNTL, 0);
4561 } else {
4562 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4563 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4564 adev->gfx.compute_ring[i].ready = false;
4565 }
4566 udelay(50);
4567}
4568
aaa36a97
AD
4569static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4570{
4571 const struct gfx_firmware_header_v1_0 *mec_hdr;
4572 const __le32 *fw_data;
4573 unsigned i, fw_size;
4574
4575 if (!adev->gfx.mec_fw)
4576 return -EINVAL;
4577
4578 gfx_v8_0_cp_compute_enable(adev, false);
4579
4580 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4581 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4582
4583 fw_data = (const __le32 *)
4584 (adev->gfx.mec_fw->data +
4585 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4586 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4587
4588 /* MEC1 */
4589 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4590 for (i = 0; i < fw_size; i++)
4591 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4592 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4593
4594 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4595 if (adev->gfx.mec2_fw) {
4596 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4597
4598 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4599 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4600
4601 fw_data = (const __le32 *)
4602 (adev->gfx.mec2_fw->data +
4603 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4604 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4605
4606 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4607 for (i = 0; i < fw_size; i++)
4608 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4609 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4610 }
4611
4612 return 0;
4613}
4614
aaa36a97
AD
4615static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4616{
4617 int i, r;
4618
4619 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4620 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4621
4622 if (ring->mqd_obj) {
4623 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4624 if (unlikely(r != 0))
4625 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4626
4627 amdgpu_bo_unpin(ring->mqd_obj);
4628 amdgpu_bo_unreserve(ring->mqd_obj);
4629
4630 amdgpu_bo_unref(&ring->mqd_obj);
4631 ring->mqd_obj = NULL;
f3972b53
ML
4632 ring->mqd_ptr = NULL;
4633 ring->mqd_gpu_addr = 0;
aaa36a97
AD
4634 }
4635 }
4636}
4637
4e638ae9
XY
4638/* KIQ functions */
4639static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4640{
4641 uint32_t tmp;
4642 struct amdgpu_device *adev = ring->adev;
4643
4644 /* tell RLC which is KIQ queue */
4645 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4646 tmp &= 0xffffff00;
4647 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4648 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4649 tmp |= 0x80;
4650 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4651}
4652
4653static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4654{
4655 amdgpu_ring_alloc(ring, 8);
4656 /* set resources */
4657 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4658 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4659 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */
4660 amdgpu_ring_write(ring, 0); /* queue mask hi */
4661 amdgpu_ring_write(ring, 0); /* gws mask lo */
4662 amdgpu_ring_write(ring, 0); /* gws mask hi */
4663 amdgpu_ring_write(ring, 0); /* oac mask */
4664 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */
4665 amdgpu_ring_commit(ring);
4666 udelay(50);
4667}
4668
4669static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4670 struct amdgpu_ring *ring)
4671{
4672 struct amdgpu_device *adev = kiq_ring->adev;
4673 uint64_t mqd_addr, wptr_addr;
4674
4675 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4676 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4677 amdgpu_ring_alloc(kiq_ring, 8);
4678
4679 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4680 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4681 amdgpu_ring_write(kiq_ring, 0x21010000);
4682 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4683 (ring->queue << 26) |
4684 (ring->pipe << 29) |
4685 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4686 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4687 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4688 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4689 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4690 amdgpu_ring_commit(kiq_ring);
4691 udelay(50);
4692}
4693
015c2360 4694static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring,
4e638ae9 4695 struct vi_mqd *mqd,
015c2360 4696 uint64_t eop_gpu_addr)
4e638ae9 4697{
015c2360 4698 struct amdgpu_device *adev = ring->adev;
4e638ae9
XY
4699 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4700 uint32_t tmp;
4701
4702 mqd->header = 0xC0310800;
4703 mqd->compute_pipelinestat_enable = 0x00000001;
4704 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4705 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4706 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4707 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4708 mqd->compute_misc_reserved = 0x00000003;
4709
4710 eop_base_addr = eop_gpu_addr >> 8;
4711 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4712 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4713
4714 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4715 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4716 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4717 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4718
4719 mqd->cp_hqd_eop_control = tmp;
4720
4721 /* enable doorbell? */
4722 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4723
4724 if (ring->use_doorbell)
4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4726 DOORBELL_EN, 1);
4727 else
4728 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4729 DOORBELL_EN, 0);
4730
4731 mqd->cp_hqd_pq_doorbell_control = tmp;
4732
4733 /* disable the queue if it's active */
4734 mqd->cp_hqd_dequeue_request = 0;
4735 mqd->cp_hqd_pq_rptr = 0;
4736 mqd->cp_hqd_pq_wptr = 0;
4737
4738 /* set the pointer to the MQD */
015c2360
AD
4739 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4740 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4e638ae9
XY
4741
4742 /* set MQD vmid to 0 */
4743 tmp = RREG32(mmCP_MQD_CONTROL);
4744 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4745 mqd->cp_mqd_control = tmp;
4746
4747 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4748 hqd_gpu_addr = ring->gpu_addr >> 8;
4749 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4750 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4751
4752 /* set up the HQD, this is similar to CP_RB0_CNTL */
4753 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4754 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4755 (order_base_2(ring->ring_size / 4) - 1));
4756 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4757 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4758#ifdef __BIG_ENDIAN
4759 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4760#endif
4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4762 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4763 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4764 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4765 mqd->cp_hqd_pq_control = tmp;
4766
4767 /* set the wb address whether it's enabled or not */
4768 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4769 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4770 mqd->cp_hqd_pq_rptr_report_addr_hi =
4771 upper_32_bits(wb_gpu_addr) & 0xffff;
4772
4773 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4774 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4775 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4776 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4777
4778 tmp = 0;
4779 /* enable the doorbell if requested */
4780 if (ring->use_doorbell) {
4781 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4782 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4783 DOORBELL_OFFSET, ring->doorbell_index);
4784
4785 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4786 DOORBELL_EN, 1);
4787 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4788 DOORBELL_SOURCE, 0);
4789 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4790 DOORBELL_HIT, 0);
4791 }
4792
4793 mqd->cp_hqd_pq_doorbell_control = tmp;
4794
4795 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4796 ring->wptr = 0;
4797 mqd->cp_hqd_pq_wptr = ring->wptr;
4798 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4799
4800 /* set the vmid for the queue */
4801 mqd->cp_hqd_vmid = 0;
4802
4803 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4804 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4805 mqd->cp_hqd_persistent_state = tmp;
4806
4807 /* activate the queue */
4808 mqd->cp_hqd_active = 1;
4809
4810 return 0;
4811}
4812
015c2360
AD
4813static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring,
4814 struct vi_mqd *mqd)
4e638ae9 4815{
015c2360 4816 struct amdgpu_device *adev = ring->adev;
4e638ae9
XY
4817 uint32_t tmp;
4818 int j;
4819
4820 /* disable wptr polling */
4821 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4822 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4823 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4824
4825 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4826 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4827
4828 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4829 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4830
4831 /* enable doorbell? */
4832 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4833
4834 /* disable the queue if it's active */
4835 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4836 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4837 for (j = 0; j < adev->usec_timeout; j++) {
4838 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4839 break;
4840 udelay(1);
4841 }
4842 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4843 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4844 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4845 }
4846
4847 /* set the pointer to the MQD */
4848 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4849 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4850
4851 /* set MQD vmid to 0 */
4852 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4853
4854 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4855 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4856 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4857
4858 /* set up the HQD, this is similar to CP_RB0_CNTL */
4859 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4860
4861 /* set the wb address whether it's enabled or not */
4862 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4863 mqd->cp_hqd_pq_rptr_report_addr_lo);
4864 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4865 mqd->cp_hqd_pq_rptr_report_addr_hi);
4866
4867 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4868 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4869 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4870
4871 /* enable the doorbell if requested */
4872 if (ring->use_doorbell) {
4873 if ((adev->asic_type == CHIP_CARRIZO) ||
4874 (adev->asic_type == CHIP_FIJI) ||
4875 (adev->asic_type == CHIP_STONEY)) {
4876 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4877 AMDGPU_DOORBELL_KIQ << 2);
4878 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4879 AMDGPU_DOORBELL_MEC_RING7 << 2);
4880 }
4881 }
4882 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4883
4884 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4885 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4886
4887 /* set the vmid for the queue */
4888 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4889
4890 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4891
4892 /* activate the queue */
4893 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4894
4895 if (ring->use_doorbell) {
4896 tmp = RREG32(mmCP_PQ_STATUS);
4897 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4898 WREG32(mmCP_PQ_STATUS, tmp);
4899 }
4900
4901 return 0;
4902}
4903
4904static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
015c2360 4905 struct vi_mqd *mqd)
4e638ae9
XY
4906{
4907 struct amdgpu_device *adev = ring->adev;
4908 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4909 uint64_t eop_gpu_addr;
2da4da3c 4910 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
1fb37a3d 4911 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4e638ae9
XY
4912
4913 if (is_kiq) {
4914 eop_gpu_addr = kiq->eop_gpu_addr;
4915 gfx_v8_0_kiq_setting(&kiq->ring);
1fb37a3d 4916 } else {
4e638ae9
XY
4917 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4918 ring->queue * MEC_HPD_SIZE;
1fb37a3d
ML
4919 mqd_idx = ring - &adev->gfx.compute_ring[0];
4920 }
4e638ae9 4921
1fb37a3d
ML
4922 if (!adev->gfx.in_reset) {
4923 memset((void *)mqd, 0, sizeof(*mqd));
4924 mutex_lock(&adev->srbm_mutex);
4925 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
015c2360 4926 gfx_v8_0_mqd_init(ring, mqd, eop_gpu_addr);
1fb37a3d 4927 if (is_kiq)
015c2360 4928 gfx_v8_0_kiq_init_register(ring, mqd);
1fb37a3d
ML
4929 vi_srbm_select(adev, 0, 0, 0, 0);
4930 mutex_unlock(&adev->srbm_mutex);
4931
4932 if (adev->gfx.mec.mqd_backup[mqd_idx])
4933 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4934 } else { /* for GPU_RESET case */
4935 /* reset MQD to a clean status */
4936 if (adev->gfx.mec.mqd_backup[mqd_idx])
4937 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4938
4939 /* reset ring buffer */
4940 ring->wptr = 0;
4941 amdgpu_ring_clear_ring(ring);
4942
4943 if (is_kiq) {
4944 mutex_lock(&adev->srbm_mutex);
4945 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
015c2360 4946 gfx_v8_0_kiq_init_register(ring, mqd);
1fb37a3d
ML
4947 vi_srbm_select(adev, 0, 0, 0, 0);
4948 mutex_unlock(&adev->srbm_mutex);
4949 }
4950 }
4e638ae9
XY
4951
4952 if (is_kiq)
4953 gfx_v8_0_kiq_enable(ring);
4954 else
4955 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4956
4957 return 0;
4958}
4959
596c67d0 4960static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4e638ae9
XY
4961{
4962 struct amdgpu_ring *ring = NULL;
596c67d0 4963 int r = 0, i;
4e638ae9 4964
596c67d0 4965 gfx_v8_0_cp_compute_enable(adev, true);
4e638ae9
XY
4966
4967 ring = &adev->gfx.kiq.ring;
6a6f380f
AD
4968
4969 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4970 if (unlikely(r != 0))
4971 goto done;
4972
4973 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4974 if (!r) {
59a82d7d 4975 r = gfx_v8_0_kiq_init_queue(ring,
015c2360 4976 (struct vi_mqd *)ring->mqd_ptr);
596c67d0 4977 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 4978 ring->mqd_ptr = NULL;
4e638ae9 4979 }
6a6f380f
AD
4980 amdgpu_bo_unreserve(ring->mqd_obj);
4981 if (r)
4982 goto done;
4e638ae9 4983
2e263c82
AD
4984 ring->ready = true;
4985 r = amdgpu_ring_test_ring(ring);
4986 if (r) {
4987 ring->ready = false;
4988 goto done;
4989 }
4990
4e638ae9
XY
4991 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4992 ring = &adev->gfx.compute_ring[i];
6a6f380f
AD
4993
4994 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4995 if (unlikely(r != 0))
4996 goto done;
4997 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4998 if (!r) {
59a82d7d 4999 r = gfx_v8_0_kiq_init_queue(ring,
015c2360 5000 (struct vi_mqd *)ring->mqd_ptr);
596c67d0 5001 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 5002 ring->mqd_ptr = NULL;
596c67d0 5003 }
6a6f380f
AD
5004 amdgpu_bo_unreserve(ring->mqd_obj);
5005 if (r)
5006 goto done;
4e638ae9
XY
5007
5008 ring->ready = true;
5009 r = amdgpu_ring_test_ring(ring);
5010 if (r)
5011 ring->ready = false;
5012 }
5013
6a6f380f
AD
5014done:
5015 return r;
4e638ae9
XY
5016}
5017
aaa36a97
AD
5018static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5019{
5020 int r, i, j;
5021 u32 tmp;
5022 bool use_doorbell = true;
5023 u64 hqd_gpu_addr;
5024 u64 mqd_gpu_addr;
5025 u64 eop_gpu_addr;
5026 u64 wb_gpu_addr;
5027 u32 *buf;
5028 struct vi_mqd *mqd;
5029
ad3b9614 5030 /* init the queues. */
aaa36a97
AD
5031 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5032 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5033
5034 if (ring->mqd_obj == NULL) {
5035 r = amdgpu_bo_create(adev,
5036 sizeof(struct vi_mqd),
5037 PAGE_SIZE, true,
5038 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
72d7668b 5039 NULL, &ring->mqd_obj);
aaa36a97
AD
5040 if (r) {
5041 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5042 return r;
5043 }
5044 }
5045
5046 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5047 if (unlikely(r != 0)) {
5048 gfx_v8_0_cp_compute_fini(adev);
5049 return r;
5050 }
5051 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5052 &mqd_gpu_addr);
5053 if (r) {
5054 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5055 gfx_v8_0_cp_compute_fini(adev);
5056 return r;
5057 }
5058 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5059 if (r) {
5060 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5061 gfx_v8_0_cp_compute_fini(adev);
5062 return r;
5063 }
5064
5065 /* init the mqd struct */
5066 memset(buf, 0, sizeof(struct vi_mqd));
5067
5068 mqd = (struct vi_mqd *)buf;
5069 mqd->header = 0xC0310800;
5070 mqd->compute_pipelinestat_enable = 0x00000001;
5071 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5072 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5073 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5074 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5075 mqd->compute_misc_reserved = 0x00000003;
5076
5077 mutex_lock(&adev->srbm_mutex);
5078 vi_srbm_select(adev, ring->me,
5079 ring->pipe,
5080 ring->queue, 0);
5081
ad3b9614
AD
5082 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5083 eop_gpu_addr >>= 8;
5084
5085 /* write the EOP addr */
5086 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5087 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5088
5089 /* set the VMID assigned */
5090 WREG32(mmCP_HQD_VMID, 0);
5091
5092 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5093 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5094 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5095 (order_base_2(MEC_HPD_SIZE / 4) - 1));
5096 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5097
aaa36a97
AD
5098 /* disable wptr polling */
5099 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5100 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5101 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5102
5103 mqd->cp_hqd_eop_base_addr_lo =
5104 RREG32(mmCP_HQD_EOP_BASE_ADDR);
5105 mqd->cp_hqd_eop_base_addr_hi =
5106 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5107
5108 /* enable doorbell? */
5109 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5110 if (use_doorbell) {
5111 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5112 } else {
5113 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5114 }
5115 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5116 mqd->cp_hqd_pq_doorbell_control = tmp;
5117
5118 /* disable the queue if it's active */
5119 mqd->cp_hqd_dequeue_request = 0;
5120 mqd->cp_hqd_pq_rptr = 0;
5121 mqd->cp_hqd_pq_wptr= 0;
5122 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5123 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5124 for (j = 0; j < adev->usec_timeout; j++) {
5125 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5126 break;
5127 udelay(1);
5128 }
5129 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5130 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5131 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5132 }
5133
5134 /* set the pointer to the MQD */
5135 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5136 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5137 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5138 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5139
5140 /* set MQD vmid to 0 */
5141 tmp = RREG32(mmCP_MQD_CONTROL);
5142 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5143 WREG32(mmCP_MQD_CONTROL, tmp);
5144 mqd->cp_mqd_control = tmp;
5145
5146 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5147 hqd_gpu_addr = ring->gpu_addr >> 8;
5148 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5149 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5150 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5151 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5152
5153 /* set up the HQD, this is similar to CP_RB0_CNTL */
5154 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5155 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5156 (order_base_2(ring->ring_size / 4) - 1));
5157 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5158 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5159#ifdef __BIG_ENDIAN
5160 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5161#endif
5162 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5163 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5164 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5165 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5166 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5167 mqd->cp_hqd_pq_control = tmp;
5168
5169 /* set the wb address wether it's enabled or not */
5170 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5171 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5172 mqd->cp_hqd_pq_rptr_report_addr_hi =
5173 upper_32_bits(wb_gpu_addr) & 0xffff;
5174 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5175 mqd->cp_hqd_pq_rptr_report_addr_lo);
5176 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5177 mqd->cp_hqd_pq_rptr_report_addr_hi);
5178
5179 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5180 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
aeab2032 5181 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
aaa36a97 5182 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
aeab2032 5183 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
aaa36a97
AD
5184 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5185 mqd->cp_hqd_pq_wptr_poll_addr_hi);
5186
5187 /* enable the doorbell if requested */
5188 if (use_doorbell) {
bddf8026 5189 if ((adev->asic_type == CHIP_CARRIZO) ||
e3c7656c 5190 (adev->asic_type == CHIP_FIJI) ||
68182d90 5191 (adev->asic_type == CHIP_STONEY) ||
2cc0c0b5 5192 (adev->asic_type == CHIP_POLARIS11) ||
c4642a47
JZ
5193 (adev->asic_type == CHIP_POLARIS10) ||
5194 (adev->asic_type == CHIP_POLARIS12)) {
aaa36a97
AD
5195 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5196 AMDGPU_DOORBELL_KIQ << 2);
5197 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
b8826b0c 5198 AMDGPU_DOORBELL_MEC_RING7 << 2);
aaa36a97
AD
5199 }
5200 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5201 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5202 DOORBELL_OFFSET, ring->doorbell_index);
5203 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5204 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5205 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5206 mqd->cp_hqd_pq_doorbell_control = tmp;
5207
5208 } else {
5209 mqd->cp_hqd_pq_doorbell_control = 0;
5210 }
5211 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5212 mqd->cp_hqd_pq_doorbell_control);
5213
845253e7
SJ
5214 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5215 ring->wptr = 0;
536fbf94 5216 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
845253e7
SJ
5217 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5218 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5219
aaa36a97
AD
5220 /* set the vmid for the queue */
5221 mqd->cp_hqd_vmid = 0;
5222 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5223
5224 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5225 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5226 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5227 mqd->cp_hqd_persistent_state = tmp;
68182d90 5228 if (adev->asic_type == CHIP_STONEY ||
2cc0c0b5 5229 adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
5230 adev->asic_type == CHIP_POLARIS10 ||
5231 adev->asic_type == CHIP_POLARIS12) {
3b55ddad
FC
5232 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5233 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5234 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5235 }
aaa36a97
AD
5236
5237 /* activate the queue */
5238 mqd->cp_hqd_active = 1;
5239 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5240
5241 vi_srbm_select(adev, 0, 0, 0, 0);
5242 mutex_unlock(&adev->srbm_mutex);
5243
5244 amdgpu_bo_kunmap(ring->mqd_obj);
5245 amdgpu_bo_unreserve(ring->mqd_obj);
5246 }
5247
5248 if (use_doorbell) {
5249 tmp = RREG32(mmCP_PQ_STATUS);
5250 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5251 WREG32(mmCP_PQ_STATUS, tmp);
5252 }
5253
6e9821b2 5254 gfx_v8_0_cp_compute_enable(adev, true);
aaa36a97
AD
5255
5256 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5257 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5258
5259 ring->ready = true;
5260 r = amdgpu_ring_test_ring(ring);
5261 if (r)
5262 ring->ready = false;
5263 }
5264
5265 return 0;
5266}
5267
5268static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5269{
5270 int r;
5271
e3c7656c 5272 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
5273 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5274
e61710c5 5275 if (!adev->pp_enabled) {
ba5c2a87
RZ
5276 if (!adev->firmware.smu_load) {
5277 /* legacy firmware loading */
5278 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5279 if (r)
5280 return r;
aaa36a97 5281
ba5c2a87
RZ
5282 r = gfx_v8_0_cp_compute_load_microcode(adev);
5283 if (r)
5284 return r;
5285 } else {
5286 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5287 AMDGPU_UCODE_ID_CP_CE);
5288 if (r)
5289 return -EINVAL;
5290
5291 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5292 AMDGPU_UCODE_ID_CP_PFP);
5293 if (r)
5294 return -EINVAL;
5295
5296 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5297 AMDGPU_UCODE_ID_CP_ME);
5298 if (r)
5299 return -EINVAL;
5300
951e0962
AD
5301 if (adev->asic_type == CHIP_TOPAZ) {
5302 r = gfx_v8_0_cp_compute_load_microcode(adev);
5303 if (r)
5304 return r;
5305 } else {
5306 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5307 AMDGPU_UCODE_ID_CP_MEC1);
5308 if (r)
5309 return -EINVAL;
5310 }
ba5c2a87 5311 }
aaa36a97
AD
5312 }
5313
5314 r = gfx_v8_0_cp_gfx_resume(adev);
5315 if (r)
5316 return r;
5317
4e638ae9
XY
5318 if (amdgpu_sriov_vf(adev))
5319 r = gfx_v8_0_kiq_resume(adev);
5320 else
5321 r = gfx_v8_0_cp_compute_resume(adev);
aaa36a97
AD
5322 if (r)
5323 return r;
5324
5325 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5326
5327 return 0;
5328}
5329
5330static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5331{
5332 gfx_v8_0_cp_gfx_enable(adev, enable);
5333 gfx_v8_0_cp_compute_enable(adev, enable);
5334}
5335
5fc3aeeb 5336static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
5337{
5338 int r;
5fc3aeeb 5339 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5340
5341 gfx_v8_0_init_golden_registers(adev);
aaa36a97
AD
5342 gfx_v8_0_gpu_init(adev);
5343
5344 r = gfx_v8_0_rlc_resume(adev);
5345 if (r)
5346 return r;
5347
5348 r = gfx_v8_0_cp_resume(adev);
aaa36a97
AD
5349
5350 return r;
5351}
5352
5fc3aeeb 5353static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 5354{
5fc3aeeb 5355 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5356
1d22a454
AD
5357 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5358 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
84f3f05b
XY
5359 if (amdgpu_sriov_vf(adev)) {
5360 pr_debug("For SRIOV client, shouldn't do anything.\n");
5361 return 0;
5362 }
aaa36a97
AD
5363 gfx_v8_0_cp_enable(adev, false);
5364 gfx_v8_0_rlc_stop(adev);
5365 gfx_v8_0_cp_compute_fini(adev);
5366
62a86fc2
EH
5367 amdgpu_set_powergating_state(adev,
5368 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5369
aaa36a97
AD
5370 return 0;
5371}
5372
5fc3aeeb 5373static int gfx_v8_0_suspend(void *handle)
aaa36a97 5374{
5fc3aeeb 5375 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5376
aaa36a97
AD
5377 return gfx_v8_0_hw_fini(adev);
5378}
5379
5fc3aeeb 5380static int gfx_v8_0_resume(void *handle)
aaa36a97 5381{
5fc3aeeb 5382 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5383
aaa36a97
AD
5384 return gfx_v8_0_hw_init(adev);
5385}
5386
5fc3aeeb 5387static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5388{
5fc3aeeb 5389 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5390
aaa36a97
AD
5391 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5392 return false;
5393 else
5394 return true;
5395}
5396
5fc3aeeb 5397static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5398{
5399 unsigned i;
5fc3aeeb 5400 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5401
5402 for (i = 0; i < adev->usec_timeout; i++) {
5003f278 5403 if (gfx_v8_0_is_idle(handle))
aaa36a97 5404 return 0;
5003f278 5405
aaa36a97
AD
5406 udelay(1);
5407 }
5408 return -ETIMEDOUT;
5409}
5410
da146d3b 5411static bool gfx_v8_0_check_soft_reset(void *handle)
aaa36a97 5412{
3d7c6384 5413 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5414 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5415 u32 tmp;
5416
5417 /* GRBM_STATUS */
5418 tmp = RREG32(mmGRBM_STATUS);
5419 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5420 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5421 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5422 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5423 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3d7c6384
CZ
5424 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5425 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
aaa36a97
AD
5426 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5427 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5428 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5429 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
aaa36a97
AD
5430 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5431 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5432 }
5433
5434 /* GRBM_STATUS2 */
5435 tmp = RREG32(mmGRBM_STATUS2);
5436 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5437 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5438 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5439
3d7c6384
CZ
5440 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5441 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5442 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5443 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5444 SOFT_RESET_CPF, 1);
5445 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5446 SOFT_RESET_CPC, 1);
5447 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5448 SOFT_RESET_CPG, 1);
5449 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5450 SOFT_RESET_GRBM, 1);
5451 }
5452
aaa36a97
AD
5453 /* SRBM_STATUS */
5454 tmp = RREG32(mmSRBM_STATUS);
5455 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5456 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5457 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3d7c6384
CZ
5458 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5459 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5460 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
aaa36a97
AD
5461
5462 if (grbm_soft_reset || srbm_soft_reset) {
3d7c6384
CZ
5463 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5464 adev->gfx.srbm_soft_reset = srbm_soft_reset;
da146d3b 5465 return true;
3d7c6384 5466 } else {
3d7c6384
CZ
5467 adev->gfx.grbm_soft_reset = 0;
5468 adev->gfx.srbm_soft_reset = 0;
da146d3b 5469 return false;
3d7c6384 5470 }
3d7c6384 5471}
aaa36a97 5472
1057f20c
CZ
5473static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5474 struct amdgpu_ring *ring)
5475{
5476 int i;
5477
5478 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5479 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5480 u32 tmp;
5481 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5482 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5483 DEQUEUE_REQ, 2);
5484 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5485 for (i = 0; i < adev->usec_timeout; i++) {
5486 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5487 break;
5488 udelay(1);
5489 }
5490 }
5491}
5492
5493static int gfx_v8_0_pre_soft_reset(void *handle)
5494{
5495 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5496 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5497
da146d3b
AD
5498 if ((!adev->gfx.grbm_soft_reset) &&
5499 (!adev->gfx.srbm_soft_reset))
1057f20c
CZ
5500 return 0;
5501
5502 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5503 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5504
5505 /* stop the rlc */
5506 gfx_v8_0_rlc_stop(adev);
5507
5508 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5509 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
aaa36a97
AD
5510 /* Disable GFX parsing/prefetching */
5511 gfx_v8_0_cp_gfx_enable(adev, false);
5512
1057f20c
CZ
5513 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5514 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5515 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5516 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5517 int i;
5518
5519 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5520 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5521
5522 gfx_v8_0_inactive_hqd(adev, ring);
5523 }
aaa36a97 5524 /* Disable MEC parsing/prefetching */
7776a693 5525 gfx_v8_0_cp_compute_enable(adev, false);
1057f20c 5526 }
7776a693 5527
1057f20c
CZ
5528 return 0;
5529}
7776a693 5530
3d7c6384
CZ
5531static int gfx_v8_0_soft_reset(void *handle)
5532{
5533 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5534 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5535 u32 tmp;
aaa36a97 5536
da146d3b
AD
5537 if ((!adev->gfx.grbm_soft_reset) &&
5538 (!adev->gfx.srbm_soft_reset))
3d7c6384 5539 return 0;
aaa36a97 5540
3d7c6384
CZ
5541 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5542 srbm_soft_reset = adev->gfx.srbm_soft_reset;
aaa36a97 5543
3d7c6384
CZ
5544 if (grbm_soft_reset || srbm_soft_reset) {
5545 tmp = RREG32(mmGMCON_DEBUG);
5546 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5547 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5548 WREG32(mmGMCON_DEBUG, tmp);
5549 udelay(50);
5550 }
aaa36a97 5551
3d7c6384
CZ
5552 if (grbm_soft_reset) {
5553 tmp = RREG32(mmGRBM_SOFT_RESET);
5554 tmp |= grbm_soft_reset;
5555 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5556 WREG32(mmGRBM_SOFT_RESET, tmp);
5557 tmp = RREG32(mmGRBM_SOFT_RESET);
aaa36a97 5558
3d7c6384 5559 udelay(50);
aaa36a97 5560
3d7c6384
CZ
5561 tmp &= ~grbm_soft_reset;
5562 WREG32(mmGRBM_SOFT_RESET, tmp);
5563 tmp = RREG32(mmGRBM_SOFT_RESET);
5564 }
7776a693 5565
3d7c6384
CZ
5566 if (srbm_soft_reset) {
5567 tmp = RREG32(mmSRBM_SOFT_RESET);
5568 tmp |= srbm_soft_reset;
5569 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5570 WREG32(mmSRBM_SOFT_RESET, tmp);
5571 tmp = RREG32(mmSRBM_SOFT_RESET);
7776a693 5572
aaa36a97 5573 udelay(50);
7776a693 5574
3d7c6384
CZ
5575 tmp &= ~srbm_soft_reset;
5576 WREG32(mmSRBM_SOFT_RESET, tmp);
5577 tmp = RREG32(mmSRBM_SOFT_RESET);
aaa36a97 5578 }
7776a693 5579
3d7c6384
CZ
5580 if (grbm_soft_reset || srbm_soft_reset) {
5581 tmp = RREG32(mmGMCON_DEBUG);
5582 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5583 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5584 WREG32(mmGMCON_DEBUG, tmp);
aaa36a97 5585 }
3d7c6384
CZ
5586
5587 /* Wait a little for things to settle down */
5588 udelay(50);
5589
aaa36a97
AD
5590 return 0;
5591}
5592
e4ae0fc3
CZ
5593static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5594 struct amdgpu_ring *ring)
5595{
5596 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5597 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5598 WREG32(mmCP_HQD_PQ_RPTR, 0);
5599 WREG32(mmCP_HQD_PQ_WPTR, 0);
5600 vi_srbm_select(adev, 0, 0, 0, 0);
5601}
5602
5603static int gfx_v8_0_post_soft_reset(void *handle)
5604{
5605 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5606 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5607
da146d3b
AD
5608 if ((!adev->gfx.grbm_soft_reset) &&
5609 (!adev->gfx.srbm_soft_reset))
e4ae0fc3
CZ
5610 return 0;
5611
5612 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5613 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5614
5615 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5616 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5617 gfx_v8_0_cp_gfx_resume(adev);
5618
5619 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5620 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5621 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5622 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5623 int i;
5624
5625 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5626 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5627
5628 gfx_v8_0_init_hqd(adev, ring);
5629 }
5630 gfx_v8_0_cp_compute_resume(adev);
5631 }
5632 gfx_v8_0_rlc_start(adev);
5633
aaa36a97
AD
5634 return 0;
5635}
5636
5637/**
5638 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5639 *
5640 * @adev: amdgpu_device pointer
5641 *
5642 * Fetches a GPU clock counter snapshot.
5643 * Returns the 64 bit clock counter snapshot.
5644 */
b95e31fd 5645static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5646{
5647 uint64_t clock;
5648
5649 mutex_lock(&adev->gfx.gpu_clock_mutex);
5650 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5651 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5652 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5653 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5654 return clock;
5655}
5656
5657static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5658 uint32_t vmid,
5659 uint32_t gds_base, uint32_t gds_size,
5660 uint32_t gws_base, uint32_t gws_size,
5661 uint32_t oa_base, uint32_t oa_size)
5662{
5663 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5664 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5665
5666 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5667 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5668
5669 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5670 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5671
5672 /* GDS Base */
5673 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5674 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5675 WRITE_DATA_DST_SEL(0)));
5676 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5677 amdgpu_ring_write(ring, 0);
5678 amdgpu_ring_write(ring, gds_base);
5679
5680 /* GDS Size */
5681 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5682 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5683 WRITE_DATA_DST_SEL(0)));
5684 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5685 amdgpu_ring_write(ring, 0);
5686 amdgpu_ring_write(ring, gds_size);
5687
5688 /* GWS */
5689 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5690 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5691 WRITE_DATA_DST_SEL(0)));
5692 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5693 amdgpu_ring_write(ring, 0);
5694 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5695
5696 /* OA */
5697 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5698 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5699 WRITE_DATA_DST_SEL(0)));
5700 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5701 amdgpu_ring_write(ring, 0);
5702 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5703}
5704
472259f0
TSD
5705static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5706{
bc24fbe9
TSD
5707 WREG32(mmSQ_IND_INDEX,
5708 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5709 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5710 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5711 (SQ_IND_INDEX__FORCE_READ_MASK));
472259f0
TSD
5712 return RREG32(mmSQ_IND_DATA);
5713}
5714
c5a60ce8
TSD
5715static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5716 uint32_t wave, uint32_t thread,
5717 uint32_t regno, uint32_t num, uint32_t *out)
5718{
5719 WREG32(mmSQ_IND_INDEX,
5720 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5721 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5722 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5723 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5724 (SQ_IND_INDEX__FORCE_READ_MASK) |
5725 (SQ_IND_INDEX__AUTO_INCR_MASK));
5726 while (num--)
5727 *(out++) = RREG32(mmSQ_IND_DATA);
5728}
5729
472259f0
TSD
5730static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5731{
5732 /* type 0 wave data */
5733 dst[(*no_fields)++] = 0;
5734 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5735 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5736 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5737 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5738 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5739 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5740 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5741 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5742 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5743 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5744 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5745 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
74f3ce31
TSD
5746 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5747 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5748 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5749 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5750 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5751 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
472259f0
TSD
5752}
5753
c5a60ce8
TSD
5754static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5755 uint32_t wave, uint32_t start,
5756 uint32_t size, uint32_t *dst)
5757{
5758 wave_read_regs(
5759 adev, simd, wave, 0,
5760 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5761}
5762
472259f0 5763
b95e31fd
AD
5764static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5765 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5766 .select_se_sh = &gfx_v8_0_select_se_sh,
472259f0 5767 .read_wave_data = &gfx_v8_0_read_wave_data,
c5a60ce8 5768 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
b95e31fd
AD
5769};
5770
5fc3aeeb 5771static int gfx_v8_0_early_init(void *handle)
aaa36a97 5772{
5fc3aeeb 5773 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5774
5775 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5776 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
b95e31fd 5777 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5778 gfx_v8_0_set_ring_funcs(adev);
5779 gfx_v8_0_set_irq_funcs(adev);
5780 gfx_v8_0_set_gds_init(adev);
dbff57bc 5781 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5782
5783 return 0;
5784}
5785
ccba7691
AD
5786static int gfx_v8_0_late_init(void *handle)
5787{
5788 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5789 int r;
5790
1d22a454
AD
5791 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5792 if (r)
5793 return r;
5794
5795 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5796 if (r)
5797 return r;
5798
ccba7691
AD
5799 /* requires IBs so do in late init after IB pool is initialized */
5800 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5801 if (r)
5802 return r;
5803
62a86fc2
EH
5804 amdgpu_set_powergating_state(adev,
5805 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5806
ccba7691
AD
5807 return 0;
5808}
5809
c2546f55
AD
5810static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5811 bool enable)
62a86fc2 5812{
c4642a47
JZ
5813 if ((adev->asic_type == CHIP_POLARIS11) ||
5814 (adev->asic_type == CHIP_POLARIS12))
c2546f55
AD
5815 /* Send msg to SMU via Powerplay */
5816 amdgpu_set_powergating_state(adev,
5817 AMD_IP_BLOCK_TYPE_SMC,
5818 enable ?
5819 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
62a86fc2 5820
61cb8cef 5821 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5822}
5823
c2546f55
AD
5824static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5825 bool enable)
62a86fc2 5826{
61cb8cef 5827 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5828}
5829
2cc0c0b5 5830static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5831 bool enable)
5832{
61cb8cef 5833 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5834}
5835
2c547165
AD
5836static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5837 bool enable)
5838{
61cb8cef 5839 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
2c547165
AD
5840}
5841
5842static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5843 bool enable)
5844{
61cb8cef 5845 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
2c547165
AD
5846
5847 /* Read any GFX register to wake up GFX. */
5848 if (!enable)
61cb8cef 5849 RREG32(mmDB_RENDER_CONTROL);
2c547165
AD
5850}
5851
5852static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5853 bool enable)
5854{
5855 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5856 cz_enable_gfx_cg_power_gating(adev, true);
5857 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5858 cz_enable_gfx_pipeline_power_gating(adev, true);
5859 } else {
5860 cz_enable_gfx_cg_power_gating(adev, false);
5861 cz_enable_gfx_pipeline_power_gating(adev, false);
5862 }
5863}
5864
5fc3aeeb 5865static int gfx_v8_0_set_powergating_state(void *handle,
5866 enum amd_powergating_state state)
aaa36a97 5867{
62a86fc2 5868 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7e913664 5869 bool enable = (state == AMD_PG_STATE_GATE);
62a86fc2 5870
ce137c04
ML
5871 if (amdgpu_sriov_vf(adev))
5872 return 0;
5873
62a86fc2 5874 switch (adev->asic_type) {
2c547165
AD
5875 case CHIP_CARRIZO:
5876 case CHIP_STONEY:
ad1830d5 5877
5c964221
RZ
5878 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5879 cz_enable_sck_slow_down_on_power_up(adev, true);
5880 cz_enable_sck_slow_down_on_power_down(adev, true);
5881 } else {
5882 cz_enable_sck_slow_down_on_power_up(adev, false);
5883 cz_enable_sck_slow_down_on_power_down(adev, false);
5884 }
5885 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5886 cz_enable_cp_power_gating(adev, true);
5887 else
5888 cz_enable_cp_power_gating(adev, false);
5889
ad1830d5 5890 cz_update_gfx_cg_power_gating(adev, enable);
2c547165
AD
5891
5892 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5893 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5894 else
5895 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5896
5897 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5898 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5899 else
5900 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5901 break;
2cc0c0b5 5902 case CHIP_POLARIS11:
c4642a47 5903 case CHIP_POLARIS12:
7ba0eb6d
AD
5904 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5905 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5906 else
5907 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5908
5909 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5910 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5911 else
5912 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5913
5914 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5915 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5916 else
7ba0eb6d 5917 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5918 break;
5919 default:
5920 break;
5921 }
5922
aaa36a97
AD
5923 return 0;
5924}
5925
ebd843d6
HR
5926static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5927{
5928 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5929 int data;
5930
ce137c04
ML
5931 if (amdgpu_sriov_vf(adev))
5932 *flags = 0;
5933
ebd843d6
HR
5934 /* AMD_CG_SUPPORT_GFX_MGCG */
5935 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5936 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5937 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5938
5939 /* AMD_CG_SUPPORT_GFX_CGLG */
5940 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5941 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5942 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5943
5944 /* AMD_CG_SUPPORT_GFX_CGLS */
5945 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5946 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5947
5948 /* AMD_CG_SUPPORT_GFX_CGTS */
5949 data = RREG32(mmCGTS_SM_CTRL_REG);
5950 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5951 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5952
5953 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5954 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5955 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5956
5957 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5958 data = RREG32(mmRLC_MEM_SLP_CNTL);
5959 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5960 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5961
5962 /* AMD_CG_SUPPORT_GFX_CP_LS */
5963 data = RREG32(mmCP_MEM_SLP_CNTL);
5964 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5965 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5966}
5967
79deaaf4 5968static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5969 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5970{
5971 uint32_t data;
5972
9559ef5b 5973 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5974
5975 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5976 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5977
5978 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f 5979 if (adev->asic_type == CHIP_STONEY)
62d2ce4b
TSD
5980 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5981 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5982 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5983 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5984 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5985 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5986 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5987 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5988 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5989 else
5990 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5991 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5992 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5993 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5994 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5995 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5996 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5997 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5998 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5999 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
6000 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 6001 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
6002 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
6003 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
6004 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
6005
6006 WREG32(mmRLC_SERDES_WR_CTRL, data);
6007}
6008
dbff57bc
AD
6009#define MSG_ENTER_RLC_SAFE_MODE 1
6010#define MSG_EXIT_RLC_SAFE_MODE 0
61cb8cef
TSD
6011#define RLC_GPR_REG2__REQ_MASK 0x00000001
6012#define RLC_GPR_REG2__REQ__SHIFT 0
6013#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
6014#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
dbff57bc 6015
dbff57bc
AD
6016static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
6017{
6018 u32 data;
6019 unsigned i;
6020
6021 data = RREG32(mmRLC_CNTL);
6022 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6023 return;
6024
6025 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6026 data |= RLC_SAFE_MODE__CMD_MASK;
6027 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6028 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
6029 WREG32(mmRLC_SAFE_MODE, data);
6030
6031 for (i = 0; i < adev->usec_timeout; i++) {
6032 if ((RREG32(mmRLC_GPM_STAT) &
6033 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6034 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6035 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6036 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6037 break;
6038 udelay(1);
6039 }
6040
6041 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 6042 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
6043 break;
6044 udelay(1);
6045 }
6046 adev->gfx.rlc.in_safe_mode = true;
6047 }
6048}
6049
6050static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6051{
6052 u32 data = 0;
6053 unsigned i;
6054
6055 data = RREG32(mmRLC_CNTL);
6056 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6057 return;
6058
6059 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6060 if (adev->gfx.rlc.in_safe_mode) {
6061 data |= RLC_SAFE_MODE__CMD_MASK;
6062 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6063 WREG32(mmRLC_SAFE_MODE, data);
6064 adev->gfx.rlc.in_safe_mode = false;
6065 }
6066 }
6067
6068 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 6069 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
6070 break;
6071 udelay(1);
6072 }
6073}
6074
dbff57bc
AD
6075static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6076 .enter_safe_mode = iceland_enter_rlc_safe_mode,
6077 .exit_safe_mode = iceland_exit_rlc_safe_mode
6078};
6079
dbff57bc
AD
6080static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6081 bool enable)
6e378858
EH
6082{
6083 uint32_t temp, data;
6084
dbff57bc
AD
6085 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6086
6e378858 6087 /* It is disabled by HW by default */
14698b6c
AD
6088 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6089 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
61cb8cef 6090 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
14698b6c 6091 /* 1 - RLC memory Light sleep */
61cb8cef 6092 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6e378858 6093
61cb8cef
TSD
6094 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6095 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
14698b6c 6096 }
6e378858
EH
6097
6098 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6099 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
6100 if (adev->flags & AMD_IS_APU)
6101 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6102 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6103 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6104 else
6105 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6106 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6107 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6108 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
6109
6110 if (temp != data)
6111 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6112
6113 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6114 gfx_v8_0_wait_for_rlc_serdes(adev);
6115
6116 /* 5 - clear mgcg override */
79deaaf4 6117 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 6118
14698b6c
AD
6119 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6120 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6121 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6122 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6123 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6124 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6125 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6126 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6127 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6128 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6129 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6130 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6131 if (temp != data)
6132 WREG32(mmCGTS_SM_CTRL_REG, data);
6133 }
6e378858
EH
6134 udelay(50);
6135
6136 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6137 gfx_v8_0_wait_for_rlc_serdes(adev);
6138 } else {
6139 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6140 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6141 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6142 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6143 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6144 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6145 if (temp != data)
6146 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6147
6148 /* 2 - disable MGLS in RLC */
6149 data = RREG32(mmRLC_MEM_SLP_CNTL);
6150 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6151 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6152 WREG32(mmRLC_MEM_SLP_CNTL, data);
6153 }
6154
6155 /* 3 - disable MGLS in CP */
6156 data = RREG32(mmCP_MEM_SLP_CNTL);
6157 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6158 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6159 WREG32(mmCP_MEM_SLP_CNTL, data);
6160 }
6161
6162 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6163 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6164 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6165 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6166 if (temp != data)
6167 WREG32(mmCGTS_SM_CTRL_REG, data);
6168
6169 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6170 gfx_v8_0_wait_for_rlc_serdes(adev);
6171
6172 /* 6 - set mgcg override */
79deaaf4 6173 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6174
6175 udelay(50);
6176
6177 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6178 gfx_v8_0_wait_for_rlc_serdes(adev);
6179 }
dbff57bc
AD
6180
6181 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
6182}
6183
dbff57bc
AD
6184static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6185 bool enable)
6e378858
EH
6186{
6187 uint32_t temp, temp1, data, data1;
6188
6189 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6190
dbff57bc
AD
6191 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6192
14698b6c 6193 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
6194 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6195 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6196 if (temp1 != data1)
6197 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6198
dd31ae9a 6199 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6e378858
EH
6200 gfx_v8_0_wait_for_rlc_serdes(adev);
6201
dd31ae9a 6202 /* 2 - clear cgcg override */
79deaaf4 6203 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
6204
6205 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6206 gfx_v8_0_wait_for_rlc_serdes(adev);
6207
dd31ae9a 6208 /* 3 - write cmd to set CGLS */
79deaaf4 6209 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858 6210
dd31ae9a 6211 /* 4 - enable cgcg */
6e378858
EH
6212 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6213
14698b6c
AD
6214 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6215 /* enable cgls*/
6216 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 6217
14698b6c
AD
6218 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6219 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 6220
14698b6c
AD
6221 if (temp1 != data1)
6222 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6223 } else {
6224 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6225 }
6e378858
EH
6226
6227 if (temp != data)
6228 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
dd31ae9a
AN
6229
6230 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6231 * Cmp_busy/GFX_Idle interrupts
6232 */
6233 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858
EH
6234 } else {
6235 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6236 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6237
6238 /* TEST CGCG */
6239 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6240 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6241 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6242 if (temp1 != data1)
6243 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6244
6245 /* read gfx register to wake up cgcg */
6246 RREG32(mmCB_CGTT_SCLK_CTRL);
6247 RREG32(mmCB_CGTT_SCLK_CTRL);
6248 RREG32(mmCB_CGTT_SCLK_CTRL);
6249 RREG32(mmCB_CGTT_SCLK_CTRL);
6250
6251 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6252 gfx_v8_0_wait_for_rlc_serdes(adev);
6253
6254 /* write cmd to Set CGCG Overrride */
79deaaf4 6255 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6256
6257 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6258 gfx_v8_0_wait_for_rlc_serdes(adev);
6259
6260 /* write cmd to Clear CGLS */
79deaaf4 6261 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
6262
6263 /* disable cgcg, cgls should be disabled too. */
6264 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 6265 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
6266 if (temp != data)
6267 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6268 }
dbff57bc 6269
7894745a
TSD
6270 gfx_v8_0_wait_for_rlc_serdes(adev);
6271
dbff57bc 6272 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 6273}
dbff57bc
AD
6274static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6275 bool enable)
6e378858
EH
6276{
6277 if (enable) {
6278 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6279 * === MGCG + MGLS + TS(CG/LS) ===
6280 */
dbff57bc
AD
6281 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6282 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
6283 } else {
6284 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6285 * === CGCG + CGLS ===
6286 */
dbff57bc
AD
6287 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6288 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
6289 }
6290 return 0;
6291}
6292
a8ca3413
RZ
6293static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6294 enum amd_clockgating_state state)
6295{
8a19e7fa
RZ
6296 uint32_t msg_id, pp_state = 0;
6297 uint32_t pp_support_state = 0;
a8ca3413
RZ
6298 void *pp_handle = adev->powerplay.pp_handle;
6299
8a19e7fa
RZ
6300 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6301 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6302 pp_support_state = PP_STATE_SUPPORT_LS;
6303 pp_state = PP_STATE_LS;
6304 }
6305 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6306 pp_support_state |= PP_STATE_SUPPORT_CG;
6307 pp_state |= PP_STATE_CG;
6308 }
6309 if (state == AMD_CG_STATE_UNGATE)
6310 pp_state = 0;
6311
6312 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6313 PP_BLOCK_GFX_CG,
6314 pp_support_state,
6315 pp_state);
6316 amd_set_clockgating_by_smu(pp_handle, msg_id);
6317 }
a8ca3413 6318
8a19e7fa
RZ
6319 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6320 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6321 pp_support_state = PP_STATE_SUPPORT_LS;
6322 pp_state = PP_STATE_LS;
6323 }
a8ca3413 6324
8a19e7fa
RZ
6325 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6326 pp_support_state |= PP_STATE_SUPPORT_CG;
6327 pp_state |= PP_STATE_CG;
6328 }
6329
6330 if (state == AMD_CG_STATE_UNGATE)
6331 pp_state = 0;
6332
6333 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6334 PP_BLOCK_GFX_MG,
6335 pp_support_state,
6336 pp_state);
6337 amd_set_clockgating_by_smu(pp_handle, msg_id);
6338 }
a8ca3413
RZ
6339
6340 return 0;
6341}
6342
6343static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6344 enum amd_clockgating_state state)
6345{
8a19e7fa
RZ
6346
6347 uint32_t msg_id, pp_state = 0;
6348 uint32_t pp_support_state = 0;
a8ca3413
RZ
6349 void *pp_handle = adev->powerplay.pp_handle;
6350
8a19e7fa
RZ
6351 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6352 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6353 pp_support_state = PP_STATE_SUPPORT_LS;
6354 pp_state = PP_STATE_LS;
6355 }
6356 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6357 pp_support_state |= PP_STATE_SUPPORT_CG;
6358 pp_state |= PP_STATE_CG;
6359 }
6360 if (state == AMD_CG_STATE_UNGATE)
6361 pp_state = 0;
6362
6363 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6364 PP_BLOCK_GFX_CG,
6365 pp_support_state,
6366 pp_state);
6367 amd_set_clockgating_by_smu(pp_handle, msg_id);
6368 }
a8ca3413 6369
8a19e7fa
RZ
6370 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6371 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6372 pp_support_state = PP_STATE_SUPPORT_LS;
6373 pp_state = PP_STATE_LS;
6374 }
6375 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6376 pp_support_state |= PP_STATE_SUPPORT_CG;
6377 pp_state |= PP_STATE_CG;
6378 }
6379 if (state == AMD_CG_STATE_UNGATE)
6380 pp_state = 0;
6381
6382 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6383 PP_BLOCK_GFX_3D,
6384 pp_support_state,
6385 pp_state);
6386 amd_set_clockgating_by_smu(pp_handle, msg_id);
6387 }
a8ca3413 6388
8a19e7fa
RZ
6389 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6390 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6391 pp_support_state = PP_STATE_SUPPORT_LS;
6392 pp_state = PP_STATE_LS;
6393 }
a8ca3413 6394
8a19e7fa
RZ
6395 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6396 pp_support_state |= PP_STATE_SUPPORT_CG;
6397 pp_state |= PP_STATE_CG;
6398 }
a8ca3413 6399
8a19e7fa
RZ
6400 if (state == AMD_CG_STATE_UNGATE)
6401 pp_state = 0;
a8ca3413 6402
8a19e7fa
RZ
6403 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6404 PP_BLOCK_GFX_MG,
6405 pp_support_state,
6406 pp_state);
6407 amd_set_clockgating_by_smu(pp_handle, msg_id);
6408 }
6409
6410 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6411 pp_support_state = PP_STATE_SUPPORT_LS;
6412
6413 if (state == AMD_CG_STATE_UNGATE)
6414 pp_state = 0;
6415 else
6416 pp_state = PP_STATE_LS;
6417
6418 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6419 PP_BLOCK_GFX_RLC,
6420 pp_support_state,
6421 pp_state);
6422 amd_set_clockgating_by_smu(pp_handle, msg_id);
6423 }
6424
6425 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6426 pp_support_state = PP_STATE_SUPPORT_LS;
6427
6428 if (state == AMD_CG_STATE_UNGATE)
6429 pp_state = 0;
6430 else
6431 pp_state = PP_STATE_LS;
6432 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
a8ca3413 6433 PP_BLOCK_GFX_CP,
8a19e7fa 6434 pp_support_state,
a8ca3413 6435 pp_state);
8a19e7fa
RZ
6436 amd_set_clockgating_by_smu(pp_handle, msg_id);
6437 }
a8ca3413
RZ
6438
6439 return 0;
6440}
6441
5fc3aeeb 6442static int gfx_v8_0_set_clockgating_state(void *handle,
6443 enum amd_clockgating_state state)
aaa36a97 6444{
6e378858
EH
6445 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6446
ce137c04
ML
6447 if (amdgpu_sriov_vf(adev))
6448 return 0;
6449
6e378858
EH
6450 switch (adev->asic_type) {
6451 case CHIP_FIJI:
dbff57bc
AD
6452 case CHIP_CARRIZO:
6453 case CHIP_STONEY:
6454 gfx_v8_0_update_gfx_clock_gating(adev,
7e913664 6455 state == AMD_CG_STATE_GATE);
6e378858 6456 break;
a8ca3413
RZ
6457 case CHIP_TONGA:
6458 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6459 break;
6460 case CHIP_POLARIS10:
6461 case CHIP_POLARIS11:
739e9fff 6462 case CHIP_POLARIS12:
a8ca3413
RZ
6463 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6464 break;
6e378858
EH
6465 default:
6466 break;
6467 }
aaa36a97
AD
6468 return 0;
6469}
6470
536fbf94 6471static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
aaa36a97 6472{
5003f278 6473 return ring->adev->wb.wb[ring->rptr_offs];
aaa36a97
AD
6474}
6475
536fbf94 6476static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
aaa36a97
AD
6477{
6478 struct amdgpu_device *adev = ring->adev;
aaa36a97
AD
6479
6480 if (ring->use_doorbell)
6481 /* XXX check if swapping is necessary on BE */
5003f278 6482 return ring->adev->wb.wb[ring->wptr_offs];
aaa36a97 6483 else
5003f278 6484 return RREG32(mmCP_RB0_WPTR);
aaa36a97
AD
6485}
6486
6487static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6488{
6489 struct amdgpu_device *adev = ring->adev;
6490
6491 if (ring->use_doorbell) {
6492 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6493 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6494 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97 6495 } else {
536fbf94 6496 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
6497 (void)RREG32(mmCP_RB0_WPTR);
6498 }
6499}
6500
d2edb07b 6501static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
6502{
6503 u32 ref_and_mask, reg_mem_engine;
6504
4e638ae9
XY
6505 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6506 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
aaa36a97
AD
6507 switch (ring->me) {
6508 case 1:
6509 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6510 break;
6511 case 2:
6512 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6513 break;
6514 default:
6515 return;
6516 }
6517 reg_mem_engine = 0;
6518 } else {
6519 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6520 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6521 }
6522
6523 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6524 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6525 WAIT_REG_MEM_FUNCTION(3) | /* == */
6526 reg_mem_engine));
6527 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6528 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6529 amdgpu_ring_write(ring, ref_and_mask);
6530 amdgpu_ring_write(ring, ref_and_mask);
6531 amdgpu_ring_write(ring, 0x20); /* poll interval */
6532}
6533
45682886
ML
6534static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6535{
6536 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6537 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6538 EVENT_INDEX(4));
6539
6540 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6541 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6542 EVENT_INDEX(0));
6543}
6544
6545
d35db561
CZ
6546static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6547{
6548 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6549 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6550 WRITE_DATA_DST_SEL(0) |
6551 WR_CONFIRM));
6552 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6553 amdgpu_ring_write(ring, 0);
6554 amdgpu_ring_write(ring, 1);
6555
6556}
6557
93323131 6558static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583
CK
6559 struct amdgpu_ib *ib,
6560 unsigned vm_id, bool ctx_switch)
aaa36a97
AD
6561{
6562 u32 header, control = 0;
aaa36a97 6563
de807f81 6564 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
6565 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6566 else
6567 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6568
d88bf583 6569 control |= ib->length_dw | (vm_id << 24);
aaa36a97
AD
6570
6571 amdgpu_ring_write(ring, header);
6572 amdgpu_ring_write(ring,
6573#ifdef __BIG_ENDIAN
6574 (2 << 0) |
6575#endif
6576 (ib->gpu_addr & 0xFFFFFFFC));
6577 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6578 amdgpu_ring_write(ring, control);
6579}
6580
93323131 6581static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583
CK
6582 struct amdgpu_ib *ib,
6583 unsigned vm_id, bool ctx_switch)
93323131 6584{
33b7ed01 6585 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
93323131 6586
33b7ed01 6587 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 6588 amdgpu_ring_write(ring,
6589#ifdef __BIG_ENDIAN
62d2ce4b 6590 (2 << 0) |
93323131 6591#endif
62d2ce4b 6592 (ib->gpu_addr & 0xFFFFFFFC));
93323131 6593 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6594 amdgpu_ring_write(ring, control);
6595}
6596
aaa36a97 6597static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 6598 u64 seq, unsigned flags)
aaa36a97 6599{
890ee23f
CZ
6600 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6601 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6602
aaa36a97
AD
6603 /* EVENT_WRITE_EOP - flush caches, send int */
6604 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6605 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6606 EOP_TC_ACTION_EN |
f84e63f2 6607 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6608 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6609 EVENT_INDEX(5)));
6610 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 6611 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 6612 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6613 amdgpu_ring_write(ring, lower_32_bits(seq));
6614 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 6615
aaa36a97
AD
6616}
6617
b8c7b39e 6618static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97 6619{
21cd942e 6620 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 6621 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
6622 uint64_t addr = ring->fence_drv.gpu_addr;
6623
6624 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6625 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6626 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6627 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6628 amdgpu_ring_write(ring, addr & 0xfffffffc);
6629 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6630 amdgpu_ring_write(ring, seq);
6631 amdgpu_ring_write(ring, 0xffffffff);
6632 amdgpu_ring_write(ring, 4); /* poll interval */
b8c7b39e
CK
6633}
6634
6635static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6636 unsigned vm_id, uint64_t pd_addr)
6637{
21cd942e 6638 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6639
aaa36a97
AD
6640 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6641 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
6642 WRITE_DATA_DST_SEL(0)) |
6643 WR_CONFIRM);
aaa36a97
AD
6644 if (vm_id < 8) {
6645 amdgpu_ring_write(ring,
6646 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6647 } else {
6648 amdgpu_ring_write(ring,
6649 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6650 }
6651 amdgpu_ring_write(ring, 0);
6652 amdgpu_ring_write(ring, pd_addr >> 12);
6653
aaa36a97
AD
6654 /* bits 0-15 are the VM contexts0-15 */
6655 /* invalidate the cache */
6656 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6657 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6658 WRITE_DATA_DST_SEL(0)));
6659 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6660 amdgpu_ring_write(ring, 0);
6661 amdgpu_ring_write(ring, 1 << vm_id);
6662
6663 /* wait for the invalidate to complete */
6664 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6665 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6666 WAIT_REG_MEM_FUNCTION(0) | /* always */
6667 WAIT_REG_MEM_ENGINE(0))); /* me */
6668 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6669 amdgpu_ring_write(ring, 0);
6670 amdgpu_ring_write(ring, 0); /* ref */
6671 amdgpu_ring_write(ring, 0); /* mask */
6672 amdgpu_ring_write(ring, 0x20); /* poll interval */
6673
6674 /* compute doesn't have PFP */
6675 if (usepfp) {
6676 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6677 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6678 amdgpu_ring_write(ring, 0x0);
d4946ccf
ML
6679 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6680 amdgpu_ring_insert_nop(ring, 128);
aaa36a97
AD
6681 }
6682}
6683
536fbf94 6684static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
aaa36a97
AD
6685{
6686 return ring->adev->wb.wb[ring->wptr_offs];
6687}
6688
6689static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6690{
6691 struct amdgpu_device *adev = ring->adev;
6692
6693 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6694 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6695 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97
AD
6696}
6697
6698static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6699 u64 addr, u64 seq,
890ee23f 6700 unsigned flags)
aaa36a97 6701{
890ee23f
CZ
6702 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6703 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6704
aaa36a97
AD
6705 /* RELEASE_MEM - flush caches, send int */
6706 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6707 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6708 EOP_TC_ACTION_EN |
a3d5aaa8 6709 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6710 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6711 EVENT_INDEX(5)));
890ee23f 6712 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6713 amdgpu_ring_write(ring, addr & 0xfffffffc);
6714 amdgpu_ring_write(ring, upper_32_bits(addr));
6715 amdgpu_ring_write(ring, lower_32_bits(seq));
6716 amdgpu_ring_write(ring, upper_32_bits(seq));
6717}
6718
4e638ae9
XY
6719static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6720 u64 seq, unsigned int flags)
6721{
6722 /* we only allocate 32bit for each seq wb address */
f10b478d 6723 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4e638ae9
XY
6724
6725 /* write fence seq to the "addr" */
6726 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6727 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6728 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6729 amdgpu_ring_write(ring, lower_32_bits(addr));
6730 amdgpu_ring_write(ring, upper_32_bits(addr));
6731 amdgpu_ring_write(ring, lower_32_bits(seq));
6732
6733 if (flags & AMDGPU_FENCE_FLAG_INT) {
6734 /* set register to trigger INT */
6735 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6736 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6737 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6738 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6739 amdgpu_ring_write(ring, 0);
6740 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6741 }
6742}
6743
c2167a65
ML
6744static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6745{
6746 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6747 amdgpu_ring_write(ring, 0);
6748}
6749
753ad49c
ML
6750static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6751{
6752 uint32_t dw2 = 0;
6753
c2ce92fc
ML
6754 if (amdgpu_sriov_vf(ring->adev))
6755 gfx_v8_0_ring_emit_ce_meta_init(ring,
6756 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6757
753ad49c
ML
6758 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6759 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
45682886 6760 gfx_v8_0_ring_emit_vgt_flush(ring);
753ad49c
ML
6761 /* set load_global_config & load_global_uconfig */
6762 dw2 |= 0x8001;
6763 /* set load_cs_sh_regs */
6764 dw2 |= 0x01000000;
6765 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6766 dw2 |= 0x10002;
6767
6768 /* set load_ce_ram if preamble presented */
6769 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6770 dw2 |= 0x10000000;
6771 } else {
6772 /* still load_ce_ram if this is the first time preamble presented
6773 * although there is no context switch happens.
6774 */
6775 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6776 dw2 |= 0x10000000;
6777 }
6778
6779 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6780 amdgpu_ring_write(ring, dw2);
6781 amdgpu_ring_write(ring, 0);
c2ce92fc
ML
6782
6783 if (amdgpu_sriov_vf(ring->adev))
6784 gfx_v8_0_ring_emit_de_meta_init(ring,
6785 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
753ad49c
ML
6786}
6787
880e87e3
XY
6788static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6789{
6790 struct amdgpu_device *adev = ring->adev;
6791
6792 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6793 amdgpu_ring_write(ring, 0 | /* src: register*/
6794 (5 << 8) | /* dst: memory */
6795 (1 << 20)); /* write confirm */
6796 amdgpu_ring_write(ring, reg);
6797 amdgpu_ring_write(ring, 0);
6798 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6799 adev->virt.reg_val_offs * 4));
6800 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6801 adev->virt.reg_val_offs * 4));
6802}
6803
6804static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6805 uint32_t val)
6806{
6807 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6808 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6809 amdgpu_ring_write(ring, reg);
6810 amdgpu_ring_write(ring, 0);
6811 amdgpu_ring_write(ring, val);
6812}
6813
aaa36a97
AD
6814static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6815 enum amdgpu_interrupt_state state)
6816{
61cb8cef
TSD
6817 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6818 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6819}
6820
6821static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6822 int me, int pipe,
6823 enum amdgpu_interrupt_state state)
6824{
aaa36a97
AD
6825 /*
6826 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6827 * handles the setting of interrupts for this specific pipe. All other
6828 * pipes' interrupts are set by amdkfd.
6829 */
6830
6831 if (me == 1) {
6832 switch (pipe) {
6833 case 0:
aaa36a97
AD
6834 break;
6835 default:
6836 DRM_DEBUG("invalid pipe %d\n", pipe);
6837 return;
6838 }
6839 } else {
6840 DRM_DEBUG("invalid me %d\n", me);
6841 return;
6842 }
6843
61cb8cef
TSD
6844 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6845 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6846}
6847
6848static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6849 struct amdgpu_irq_src *source,
6850 unsigned type,
6851 enum amdgpu_interrupt_state state)
6852{
61cb8cef
TSD
6853 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6854 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6855
6856 return 0;
6857}
6858
6859static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6860 struct amdgpu_irq_src *source,
6861 unsigned type,
6862 enum amdgpu_interrupt_state state)
6863{
61cb8cef
TSD
6864 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6865 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6866
6867 return 0;
6868}
6869
6870static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6871 struct amdgpu_irq_src *src,
6872 unsigned type,
6873 enum amdgpu_interrupt_state state)
6874{
6875 switch (type) {
6876 case AMDGPU_CP_IRQ_GFX_EOP:
6877 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6878 break;
6879 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6880 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6881 break;
6882 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6883 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6884 break;
6885 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6886 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6887 break;
6888 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6889 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6890 break;
6891 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6892 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6893 break;
6894 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6895 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6896 break;
6897 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6898 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6899 break;
6900 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6901 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6902 break;
6903 default:
6904 break;
6905 }
6906 return 0;
6907}
6908
6909static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6910 struct amdgpu_irq_src *source,
6911 struct amdgpu_iv_entry *entry)
6912{
6913 int i;
6914 u8 me_id, pipe_id, queue_id;
6915 struct amdgpu_ring *ring;
6916
6917 DRM_DEBUG("IH: CP EOP\n");
6918 me_id = (entry->ring_id & 0x0c) >> 2;
6919 pipe_id = (entry->ring_id & 0x03) >> 0;
6920 queue_id = (entry->ring_id & 0x70) >> 4;
6921
6922 switch (me_id) {
6923 case 0:
6924 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6925 break;
6926 case 1:
6927 case 2:
6928 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6929 ring = &adev->gfx.compute_ring[i];
6930 /* Per-queue interrupt is supported for MEC starting from VI.
6931 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6932 */
6933 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6934 amdgpu_fence_process(ring);
6935 }
6936 break;
6937 }
6938 return 0;
6939}
6940
6941static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6942 struct amdgpu_irq_src *source,
6943 struct amdgpu_iv_entry *entry)
6944{
6945 DRM_ERROR("Illegal register access in command stream\n");
6946 schedule_work(&adev->reset_work);
6947 return 0;
6948}
6949
6950static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6951 struct amdgpu_irq_src *source,
6952 struct amdgpu_iv_entry *entry)
6953{
6954 DRM_ERROR("Illegal instruction in command stream\n");
6955 schedule_work(&adev->reset_work);
6956 return 0;
6957}
6958
4e638ae9
XY
6959static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6960 struct amdgpu_irq_src *src,
6961 unsigned int type,
6962 enum amdgpu_interrupt_state state)
6963{
6964 uint32_t tmp, target;
07c397f9 6965 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6966
07c397f9 6967 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
4e638ae9
XY
6968
6969 if (ring->me == 1)
6970 target = mmCP_ME1_PIPE0_INT_CNTL;
6971 else
6972 target = mmCP_ME2_PIPE0_INT_CNTL;
6973 target += ring->pipe;
6974
6975 switch (type) {
6976 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6977 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6978 tmp = RREG32(mmCPC_INT_CNTL);
6979 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6980 GENERIC2_INT_ENABLE, 0);
6981 WREG32(mmCPC_INT_CNTL, tmp);
6982
6983 tmp = RREG32(target);
6984 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6985 GENERIC2_INT_ENABLE, 0);
6986 WREG32(target, tmp);
6987 } else {
6988 tmp = RREG32(mmCPC_INT_CNTL);
6989 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6990 GENERIC2_INT_ENABLE, 1);
6991 WREG32(mmCPC_INT_CNTL, tmp);
6992
6993 tmp = RREG32(target);
6994 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6995 GENERIC2_INT_ENABLE, 1);
6996 WREG32(target, tmp);
6997 }
6998 break;
6999 default:
7000 BUG(); /* kiq only support GENERIC2_INT now */
7001 break;
7002 }
7003 return 0;
7004}
7005
7006static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7007 struct amdgpu_irq_src *source,
7008 struct amdgpu_iv_entry *entry)
7009{
7010 u8 me_id, pipe_id, queue_id;
07c397f9 7011 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 7012
07c397f9 7013 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
4e638ae9
XY
7014
7015 me_id = (entry->ring_id & 0x0c) >> 2;
7016 pipe_id = (entry->ring_id & 0x03) >> 0;
7017 queue_id = (entry->ring_id & 0x70) >> 4;
7018 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7019 me_id, pipe_id, queue_id);
7020
7021 amdgpu_fence_process(ring);
7022 return 0;
7023}
7024
a1255107 7025static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 7026 .name = "gfx_v8_0",
aaa36a97 7027 .early_init = gfx_v8_0_early_init,
ccba7691 7028 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
7029 .sw_init = gfx_v8_0_sw_init,
7030 .sw_fini = gfx_v8_0_sw_fini,
7031 .hw_init = gfx_v8_0_hw_init,
7032 .hw_fini = gfx_v8_0_hw_fini,
7033 .suspend = gfx_v8_0_suspend,
7034 .resume = gfx_v8_0_resume,
7035 .is_idle = gfx_v8_0_is_idle,
7036 .wait_for_idle = gfx_v8_0_wait_for_idle,
3d7c6384 7037 .check_soft_reset = gfx_v8_0_check_soft_reset,
1057f20c 7038 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
aaa36a97 7039 .soft_reset = gfx_v8_0_soft_reset,
e4ae0fc3 7040 .post_soft_reset = gfx_v8_0_post_soft_reset,
aaa36a97
AD
7041 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7042 .set_powergating_state = gfx_v8_0_set_powergating_state,
ebd843d6 7043 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
aaa36a97
AD
7044};
7045
7046static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
21cd942e 7047 .type = AMDGPU_RING_TYPE_GFX,
79887142
CK
7048 .align_mask = 0xff,
7049 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7050 .support_64bit_ptrs = false,
e7706b42 7051 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7052 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7053 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
e12f3d7a
CK
7054 .emit_frame_size =
7055 20 + /* gfx_v8_0_ring_emit_gds_switch */
7056 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7057 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7058 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7059 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7060 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
7061 2 + /* gfx_v8_ring_emit_sb */
c2ce92fc 7062 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
e12f3d7a 7063 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
93323131 7064 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 7065 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 7066 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7067 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7068 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 7069 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 7070 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
7071 .test_ring = gfx_v8_0_ring_test_ring,
7072 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7073 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7074 .pad_ib = amdgpu_ring_generic_pad_ib,
c2167a65 7075 .emit_switch_buffer = gfx_v8_ring_emit_sb,
753ad49c 7076 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
aaa36a97
AD
7077};
7078
7079static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
21cd942e 7080 .type = AMDGPU_RING_TYPE_COMPUTE,
79887142
CK
7081 .align_mask = 0xff,
7082 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7083 .support_64bit_ptrs = false,
e7706b42 7084 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7085 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7086 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
e12f3d7a
CK
7087 .emit_frame_size =
7088 20 + /* gfx_v8_0_ring_emit_gds_switch */
7089 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7090 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7091 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7092 17 + /* gfx_v8_0_ring_emit_vm_flush */
7093 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7094 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
93323131 7095 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 7096 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 7097 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7098 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7099 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 7100 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 7101 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
7102 .test_ring = gfx_v8_0_ring_test_ring,
7103 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7104 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7105 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
7106};
7107
4e638ae9
XY
7108static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7109 .type = AMDGPU_RING_TYPE_KIQ,
7110 .align_mask = 0xff,
7111 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7112 .support_64bit_ptrs = false,
4e638ae9
XY
7113 .get_rptr = gfx_v8_0_ring_get_rptr,
7114 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7115 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7116 .emit_frame_size =
7117 20 + /* gfx_v8_0_ring_emit_gds_switch */
7118 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7119 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7120 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7121 17 + /* gfx_v8_0_ring_emit_vm_flush */
7122 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7123 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7124 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7125 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7126 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7127 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7128 .test_ring = gfx_v8_0_ring_test_ring,
7129 .test_ib = gfx_v8_0_ring_test_ib,
7130 .insert_nop = amdgpu_ring_insert_nop,
7131 .pad_ib = amdgpu_ring_generic_pad_ib,
880e87e3
XY
7132 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7133 .emit_wreg = gfx_v8_0_ring_emit_wreg,
4e638ae9
XY
7134};
7135
aaa36a97
AD
7136static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7137{
7138 int i;
7139
4e638ae9
XY
7140 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7141
aaa36a97
AD
7142 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7143 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7144
7145 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7146 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7147}
7148
7149static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7150 .set = gfx_v8_0_set_eop_interrupt_state,
7151 .process = gfx_v8_0_eop_irq,
7152};
7153
7154static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7155 .set = gfx_v8_0_set_priv_reg_fault_state,
7156 .process = gfx_v8_0_priv_reg_irq,
7157};
7158
7159static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7160 .set = gfx_v8_0_set_priv_inst_fault_state,
7161 .process = gfx_v8_0_priv_inst_irq,
7162};
7163
4e638ae9
XY
7164static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7165 .set = gfx_v8_0_kiq_set_interrupt_state,
7166 .process = gfx_v8_0_kiq_irq,
7167};
7168
aaa36a97
AD
7169static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7170{
7171 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7172 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7173
7174 adev->gfx.priv_reg_irq.num_types = 1;
7175 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7176
7177 adev->gfx.priv_inst_irq.num_types = 1;
7178 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4e638ae9
XY
7179
7180 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7181 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
aaa36a97
AD
7182}
7183
dbff57bc
AD
7184static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7185{
ae6a58e4 7186 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
dbff57bc
AD
7187}
7188
aaa36a97
AD
7189static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7190{
7191 /* init asci gds info */
7192 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7193 adev->gds.gws.total_size = 64;
7194 adev->gds.oa.total_size = 16;
7195
7196 if (adev->gds.mem.total_size == 64 * 1024) {
7197 adev->gds.mem.gfx_partition_size = 4096;
7198 adev->gds.mem.cs_partition_size = 4096;
7199
7200 adev->gds.gws.gfx_partition_size = 4;
7201 adev->gds.gws.cs_partition_size = 4;
7202
7203 adev->gds.oa.gfx_partition_size = 4;
7204 adev->gds.oa.cs_partition_size = 1;
7205 } else {
7206 adev->gds.mem.gfx_partition_size = 1024;
7207 adev->gds.mem.cs_partition_size = 1024;
7208
7209 adev->gds.gws.gfx_partition_size = 16;
7210 adev->gds.gws.cs_partition_size = 16;
7211
7212 adev->gds.oa.gfx_partition_size = 4;
7213 adev->gds.oa.cs_partition_size = 4;
7214 }
7215}
7216
9de06de8
NH
7217static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7218 u32 bitmap)
7219{
7220 u32 data;
7221
7222 if (!bitmap)
7223 return;
7224
7225 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7226 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7227
7228 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7229}
7230
8f8e00c1 7231static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 7232{
8f8e00c1 7233 u32 data, mask;
aaa36a97 7234
5003f278
TSD
7235 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7236 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 7237
6157bd7a 7238 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 7239
5003f278 7240 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
aaa36a97
AD
7241}
7242
7dae69a2 7243static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
7244{
7245 int i, j, k, counter, active_cu_number = 0;
7246 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 7247 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 7248 unsigned disable_masks[4 * 2];
aaa36a97 7249
6157bd7a
FC
7250 memset(cu_info, 0, sizeof(*cu_info));
7251
9de06de8
NH
7252 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7253
aaa36a97
AD
7254 mutex_lock(&adev->grbm_idx_mutex);
7255 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7256 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7257 mask = 1;
7258 ao_bitmap = 0;
7259 counter = 0;
9559ef5b 7260 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
7261 if (i < 4 && j < 2)
7262 gfx_v8_0_set_user_cu_inactive_bitmap(
7263 adev, disable_masks[i * 2 + j]);
8f8e00c1 7264 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
7265 cu_info->bitmap[i][j] = bitmap;
7266
8f8e00c1 7267 for (k = 0; k < 16; k ++) {
aaa36a97
AD
7268 if (bitmap & mask) {
7269 if (counter < 2)
7270 ao_bitmap |= mask;
7271 counter ++;
7272 }
7273 mask <<= 1;
7274 }
7275 active_cu_number += counter;
7276 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7277 }
7278 }
9559ef5b 7279 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 7280 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
7281
7282 cu_info->number = active_cu_number;
7283 cu_info->ao_cu_mask = ao_cu_mask;
aaa36a97 7284}
a1255107
AD
7285
7286const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7287{
7288 .type = AMD_IP_BLOCK_TYPE_GFX,
7289 .major = 8,
7290 .minor = 0,
7291 .rev = 0,
7292 .funcs = &gfx_v8_0_ip_funcs,
7293};
7294
7295const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7296{
7297 .type = AMD_IP_BLOCK_TYPE_GFX,
7298 .major = 8,
7299 .minor = 1,
7300 .rev = 0,
7301 .funcs = &gfx_v8_0_ip_funcs,
7302};
acad2b2a
ML
7303
7304static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7305{
7306 uint64_t ce_payload_addr;
7307 int cnt_ce;
7308 static union {
49abb980
XY
7309 struct vi_ce_ib_state regular;
7310 struct vi_ce_ib_state_chained_ib chained;
e8411302 7311 } ce_payload = {};
acad2b2a
ML
7312
7313 if (ring->adev->virt.chained_ib_support) {
49abb980 7314 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
acad2b2a
ML
7315 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7316 } else {
49abb980 7317 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload);
acad2b2a
ML
7318 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7319 }
7320
7321 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7322 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7323 WRITE_DATA_DST_SEL(8) |
7324 WR_CONFIRM) |
7325 WRITE_DATA_CACHE_POLICY(0));
7326 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7327 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7328 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7329}
7330
7331static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7332{
7333 uint64_t de_payload_addr, gds_addr;
7334 int cnt_de;
7335 static union {
49abb980
XY
7336 struct vi_de_ib_state regular;
7337 struct vi_de_ib_state_chained_ib chained;
e8411302 7338 } de_payload = {};
acad2b2a
ML
7339
7340 gds_addr = csa_addr + 4096;
7341 if (ring->adev->virt.chained_ib_support) {
7342 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7343 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7344 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
acad2b2a
ML
7345 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7346 } else {
7347 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7348 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7349 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
acad2b2a
ML
7350 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7351 }
7352
7353 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7354 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7355 WRITE_DATA_DST_SEL(8) |
7356 WR_CONFIRM) |
7357 WRITE_DATA_CACHE_POLICY(0));
7358 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7359 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7360 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7361}
5ff98043
ML
7362
7363/* create MQD for each compute queue */
0875a242 7364static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
5ff98043
ML
7365{
7366 struct amdgpu_ring *ring = NULL;
7367 int r, i;
7368
7369 /* create MQD for KIQ */
7370 ring = &adev->gfx.kiq.ring;
7371 if (!ring->mqd_obj) {
7372 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7373 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7374 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7375 if (r) {
7376 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7377 return r;
7378 }
9b49c3ab
ML
7379
7380 /* prepare MQD backup */
7381 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7382 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7383 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7384 }
7385
7386 /* create MQD for each KCQ */
b0ac2a32 7387 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5ff98043
ML
7388 ring = &adev->gfx.compute_ring[i];
7389 if (!ring->mqd_obj) {
7390 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7391 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7392 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7393 if (r) {
7394 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7395 return r;
7396 }
9b49c3ab
ML
7397
7398 /* prepare MQD backup */
7399 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7400 if (!adev->gfx.mec.mqd_backup[i])
7401 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7402 }
7403 }
7404
7405 return 0;
7406}
7407
0875a242 7408static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
5ff98043
ML
7409{
7410 struct amdgpu_ring *ring = NULL;
7411 int i;
7412
7413 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7414 ring = &adev->gfx.compute_ring[i];
24de7515 7415 kfree(adev->gfx.mec.mqd_backup[i]);
59a82d7d
XY
7416 amdgpu_bo_free_kernel(&ring->mqd_obj,
7417 &ring->mqd_gpu_addr,
7418 &ring->mqd_ptr);
5ff98043
ML
7419 }
7420
7421 ring = &adev->gfx.kiq.ring;
24de7515 7422 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
59a82d7d
XY
7423 amdgpu_bo_free_kernel(&ring->mqd_obj,
7424 &ring->mqd_gpu_addr,
7425 &ring->mqd_ptr);
7426}