drm/amdgpu: allocate queues horizontally across pipes
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
aeab2032 28#include "vi_structs.h"
aaa36a97
AD
29#include "vid.h"
30#include "amdgpu_ucode.h"
68182d90 31#include "amdgpu_atombios.h"
eeade25a 32#include "atombios_i2c.h"
aaa36a97
AD
33#include "clearstate_vi.h"
34
35#include "gmc/gmc_8_2_d.h"
36#include "gmc/gmc_8_2_sh_mask.h"
37
38#include "oss/oss_3_0_d.h"
39#include "oss/oss_3_0_sh_mask.h"
40
41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h"
43
44#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h"
47#include "gca/gfx_8_0_enum.h"
48
aaa36a97
AD
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
d9d533c1
KW
52#include "smu/smu_7_1_3_d.h"
53
aaa36a97 54#define GFX8_NUM_GFX_RINGS 1
268cb4c7 55#define GFX8_MEC_HPD_SIZE 2048
aaa36a97
AD
56
57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
6e378858
EH
72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
78
79/* BPM SERDES CMD */
80#define SET_BPM_SERDES_CMD 1
81#define CLE_BPM_SERDES_CMD 0
82
83/* BPM Register Address*/
84enum {
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
90 BPM_REG_FGCG_MAX
91};
92
2b6cd977
EH
93#define RLC_FormatDirectRegListLength 14
94
c65444fe
JZ
95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
e3c7656c
SL
102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
c65444fe
JZ
108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 120
af15a2d5
DZ
121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
2cc0c0b5
FC
128MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
68182d90 134
2cc0c0b5
FC
135MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 141
c4642a47
JZ
142MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
aaa36a97
AD
149static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150{
151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167};
168
169static const u32 golden_settings_tonga_a11[] =
170{
171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
ff9d6460 178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
187};
188
189static const u32 tonga_golden_common_all[] =
190{
191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199};
200
201static const u32 tonga_mgcg_cgcg_init[] =
202{
203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278};
279
2cc0c0b5 280static const u32 golden_settings_polaris11_a11[] =
68182d90 281{
9761bc53
HR
282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
9761bc53 292 mmSQ_CONFIG, 0x07f80000, 0x01180000,
68182d90
FC
293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
299};
300
2cc0c0b5 301static const u32 polaris11_golden_common_all[] =
68182d90
FC
302{
303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309};
310
2cc0c0b5 311static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
312{
313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
a5a5e308
HR
314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
68182d90
FC
316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
330};
331
2cc0c0b5 332static const u32 polaris10_golden_common_all[] =
68182d90
FC
333{
334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342};
343
af15a2d5
DZ
344static const u32 fiji_golden_common_all[] =
345{
346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
356};
357
358static const u32 golden_settings_fiji_a10[] =
359{
360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
367 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
371};
372
373static const u32 fiji_mgcg_cgcg_init[] =
374{
a7ca8ef9 375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410};
411
aaa36a97
AD
412static const u32 golden_settings_iceland_a11[] =
413{
414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
fe85f07f 422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430};
431
432static const u32 iceland_golden_common_all[] =
433{
434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442};
443
444static const u32 iceland_mgcg_cgcg_init[] =
445{
446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510};
511
512static const u32 cz_golden_settings_a11[] =
513{
514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
3a494b58 519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
6a00a09e 520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
3a494b58 522 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526};
527
528static const u32 cz_golden_common_all[] =
529{
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538};
539
540static const u32 cz_mgcg_cgcg_init[] =
541{
542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617};
618
e3c7656c
SL
619static const u32 stoney_golden_settings_a11[] =
620{
621 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 627 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631};
632
633static const u32 stoney_golden_common_all[] =
634{
635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643};
644
645static const u32 stoney_mgcg_cgcg_init[] =
646{
647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
e3c7656c
SL
652};
653
aaa36a97
AD
654static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 657static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 658static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 659static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
95243543
ML
660static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
661static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
0875a242
AD
662static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
aaa36a97
AD
664
665static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666{
667 switch (adev->asic_type) {
668 case CHIP_TOPAZ:
669 amdgpu_program_register_sequence(adev,
670 iceland_mgcg_cgcg_init,
671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
672 amdgpu_program_register_sequence(adev,
673 golden_settings_iceland_a11,
674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
675 amdgpu_program_register_sequence(adev,
676 iceland_golden_common_all,
677 (const u32)ARRAY_SIZE(iceland_golden_common_all));
678 break;
af15a2d5
DZ
679 case CHIP_FIJI:
680 amdgpu_program_register_sequence(adev,
681 fiji_mgcg_cgcg_init,
682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 golden_settings_fiji_a10,
685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
686 amdgpu_program_register_sequence(adev,
687 fiji_golden_common_all,
688 (const u32)ARRAY_SIZE(fiji_golden_common_all));
689 break;
690
aaa36a97
AD
691 case CHIP_TONGA:
692 amdgpu_program_register_sequence(adev,
693 tonga_mgcg_cgcg_init,
694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
695 amdgpu_program_register_sequence(adev,
696 golden_settings_tonga_a11,
697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
698 amdgpu_program_register_sequence(adev,
699 tonga_golden_common_all,
700 (const u32)ARRAY_SIZE(tonga_golden_common_all));
701 break;
2cc0c0b5 702 case CHIP_POLARIS11:
c4642a47 703 case CHIP_POLARIS12:
68182d90 704 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
705 golden_settings_polaris11_a11,
706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
68182d90 707 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
708 polaris11_golden_common_all,
709 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
68182d90 710 break;
2cc0c0b5 711 case CHIP_POLARIS10:
68182d90 712 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
713 golden_settings_polaris10_a11,
714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
68182d90 715 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
716 polaris10_golden_common_all,
717 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
5765a36d
RZ
719 if (adev->pdev->revision == 0xc7 &&
720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
eeade25a
KW
723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
725 }
68182d90 726 break;
aaa36a97
AD
727 case CHIP_CARRIZO:
728 amdgpu_program_register_sequence(adev,
729 cz_mgcg_cgcg_init,
730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
731 amdgpu_program_register_sequence(adev,
732 cz_golden_settings_a11,
733 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
734 amdgpu_program_register_sequence(adev,
735 cz_golden_common_all,
736 (const u32)ARRAY_SIZE(cz_golden_common_all));
737 break;
e3c7656c
SL
738 case CHIP_STONEY:
739 amdgpu_program_register_sequence(adev,
740 stoney_mgcg_cgcg_init,
741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
742 amdgpu_program_register_sequence(adev,
743 stoney_golden_settings_a11,
744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
745 amdgpu_program_register_sequence(adev,
746 stoney_golden_common_all,
747 (const u32)ARRAY_SIZE(stoney_golden_common_all));
748 break;
aaa36a97
AD
749 default:
750 break;
751 }
752}
753
754static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
755{
aaa36a97
AD
756 adev->gfx.scratch.num_reg = 7;
757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
50261151 758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
aaa36a97
AD
759}
760
761static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
762{
763 struct amdgpu_device *adev = ring->adev;
764 uint32_t scratch;
765 uint32_t tmp = 0;
766 unsigned i;
767 int r;
768
769 r = amdgpu_gfx_scratch_get(adev, &scratch);
770 if (r) {
771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772 return r;
773 }
774 WREG32(scratch, 0xCAFEDEAD);
a27de35c 775 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
776 if (r) {
777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
778 ring->idx, r);
779 amdgpu_gfx_scratch_free(adev, scratch);
780 return r;
781 }
782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
784 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 785 amdgpu_ring_commit(ring);
aaa36a97
AD
786
787 for (i = 0; i < adev->usec_timeout; i++) {
788 tmp = RREG32(scratch);
789 if (tmp == 0xDEADBEEF)
790 break;
791 DRM_UDELAY(1);
792 }
793 if (i < adev->usec_timeout) {
794 DRM_INFO("ring test on %d succeeded in %d usecs\n",
795 ring->idx, i);
796 } else {
797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
798 ring->idx, scratch, tmp);
799 r = -EINVAL;
800 }
801 amdgpu_gfx_scratch_free(adev, scratch);
802 return r;
803}
804
bbec97aa 805static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
aaa36a97
AD
806{
807 struct amdgpu_device *adev = ring->adev;
808 struct amdgpu_ib ib;
f54d1867 809 struct dma_fence *f = NULL;
aaa36a97
AD
810 uint32_t scratch;
811 uint32_t tmp = 0;
bbec97aa 812 long r;
aaa36a97
AD
813
814 r = amdgpu_gfx_scratch_get(adev, &scratch);
815 if (r) {
bbec97aa 816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
aaa36a97
AD
817 return r;
818 }
819 WREG32(scratch, 0xCAFEDEAD);
b203dd95 820 memset(&ib, 0, sizeof(ib));
b07c60c0 821 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97 822 if (r) {
bbec97aa 823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
42d13693 824 goto err1;
aaa36a97
AD
825 }
826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
828 ib.ptr[2] = 0xDEADBEEF;
829 ib.length_dw = 3;
42d13693 830
50ddc75e 831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
42d13693
CZ
832 if (r)
833 goto err2;
834
f54d1867 835 r = dma_fence_wait_timeout(f, false, timeout);
bbec97aa
CK
836 if (r == 0) {
837 DRM_ERROR("amdgpu: IB test timed out.\n");
838 r = -ETIMEDOUT;
839 goto err2;
840 } else if (r < 0) {
841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
42d13693 842 goto err2;
aaa36a97 843 }
6d44565d
CK
844 tmp = RREG32(scratch);
845 if (tmp == 0xDEADBEEF) {
846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
bbec97aa 847 r = 0;
aaa36a97
AD
848 } else {
849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
850 scratch, tmp);
851 r = -EINVAL;
852 }
42d13693 853err2:
cc55c45d 854 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 855 dma_fence_put(f);
42d13693
CZ
856err1:
857 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
858 return r;
859}
860
13331ac3
ML
861
862static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
863 release_firmware(adev->gfx.pfp_fw);
864 adev->gfx.pfp_fw = NULL;
865 release_firmware(adev->gfx.me_fw);
866 adev->gfx.me_fw = NULL;
867 release_firmware(adev->gfx.ce_fw);
868 adev->gfx.ce_fw = NULL;
869 release_firmware(adev->gfx.rlc_fw);
870 adev->gfx.rlc_fw = NULL;
871 release_firmware(adev->gfx.mec_fw);
872 adev->gfx.mec_fw = NULL;
873 if ((adev->asic_type != CHIP_STONEY) &&
874 (adev->asic_type != CHIP_TOPAZ))
875 release_firmware(adev->gfx.mec2_fw);
876 adev->gfx.mec2_fw = NULL;
877
878 kfree(adev->gfx.rlc.register_list_format);
879}
880
aaa36a97
AD
881static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
882{
883 const char *chip_name;
884 char fw_name[30];
885 int err;
886 struct amdgpu_firmware_info *info = NULL;
887 const struct common_firmware_header *header = NULL;
595fd013 888 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
889 const struct rlc_firmware_header_v2_0 *rlc_hdr;
890 unsigned int *tmp = NULL, i;
aaa36a97
AD
891
892 DRM_DEBUG("\n");
893
894 switch (adev->asic_type) {
895 case CHIP_TOPAZ:
896 chip_name = "topaz";
897 break;
898 case CHIP_TONGA:
899 chip_name = "tonga";
900 break;
901 case CHIP_CARRIZO:
902 chip_name = "carrizo";
903 break;
af15a2d5
DZ
904 case CHIP_FIJI:
905 chip_name = "fiji";
906 break;
2cc0c0b5
FC
907 case CHIP_POLARIS11:
908 chip_name = "polaris11";
68182d90 909 break;
2cc0c0b5
FC
910 case CHIP_POLARIS10:
911 chip_name = "polaris10";
68182d90 912 break;
c4642a47
JZ
913 case CHIP_POLARIS12:
914 chip_name = "polaris12";
915 break;
e3c7656c
SL
916 case CHIP_STONEY:
917 chip_name = "stoney";
918 break;
aaa36a97
AD
919 default:
920 BUG();
921 }
922
c65444fe 923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
925 if (err)
926 goto out;
927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
928 if (err)
929 goto out;
595fd013
JZ
930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 933
c65444fe 934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
936 if (err)
937 goto out;
938 err = amdgpu_ucode_validate(adev->gfx.me_fw);
939 if (err)
940 goto out;
595fd013
JZ
941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
ae65a26d 943
595fd013 944 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 945
c65444fe 946 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
947 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
948 if (err)
949 goto out;
950 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
951 if (err)
952 goto out;
595fd013
JZ
953 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
954 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 956
63a7c748
TH
957 /*
958 * Support for MCBP/Virtualization in combination with chained IBs is
959 * formal released on feature version #46
960 */
961 if (adev->gfx.ce_feature_version >= 46 &&
962 adev->gfx.pfp_feature_version >= 46) {
963 adev->virt.chained_ib_support = true;
964 DRM_INFO("Chained IB support enabled!\n");
965 } else
966 adev->virt.chained_ib_support = false;
967
c65444fe 968 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
969 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
970 if (err)
971 goto out;
972 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
973 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
974 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
975 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
976
977 adev->gfx.rlc.save_and_restore_offset =
978 le32_to_cpu(rlc_hdr->save_and_restore_offset);
979 adev->gfx.rlc.clear_state_descriptor_offset =
980 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
981 adev->gfx.rlc.avail_scratch_ram_locations =
982 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
983 adev->gfx.rlc.reg_restore_list_size =
984 le32_to_cpu(rlc_hdr->reg_restore_list_size);
985 adev->gfx.rlc.reg_list_format_start =
986 le32_to_cpu(rlc_hdr->reg_list_format_start);
987 adev->gfx.rlc.reg_list_format_separate_start =
988 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
989 adev->gfx.rlc.starting_offsets_start =
990 le32_to_cpu(rlc_hdr->starting_offsets_start);
991 adev->gfx.rlc.reg_list_format_size_bytes =
992 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
993 adev->gfx.rlc.reg_list_size_bytes =
994 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
995
996 adev->gfx.rlc.register_list_format =
997 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
998 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
999
1000 if (!adev->gfx.rlc.register_list_format) {
1001 err = -ENOMEM;
1002 goto out;
1003 }
1004
ae17c999 1005 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1006 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1007 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1008 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1009
1010 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1011
ae17c999 1012 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
1013 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1014 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1015 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 1016
c65444fe 1017 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
1018 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1019 if (err)
1020 goto out;
1021 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1022 if (err)
1023 goto out;
595fd013
JZ
1024 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1025 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1026 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1027
97dde76a
AD
1028 if ((adev->asic_type != CHIP_STONEY) &&
1029 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
1030 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1031 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1032 if (!err) {
1033 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1034 if (err)
1035 goto out;
1036 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1037 adev->gfx.mec2_fw->data;
1038 adev->gfx.mec2_fw_version =
1039 le32_to_cpu(cp_hdr->header.ucode_version);
1040 adev->gfx.mec2_feature_version =
1041 le32_to_cpu(cp_hdr->ucode_feature_version);
1042 } else {
1043 err = 0;
1044 adev->gfx.mec2_fw = NULL;
1045 }
aaa36a97
AD
1046 }
1047
e635ee07 1048 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
aaa36a97
AD
1049 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1050 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1051 info->fw = adev->gfx.pfp_fw;
1052 header = (const struct common_firmware_header *)info->fw->data;
1053 adev->firmware.fw_size +=
1054 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1055
1056 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1057 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1058 info->fw = adev->gfx.me_fw;
1059 header = (const struct common_firmware_header *)info->fw->data;
1060 adev->firmware.fw_size +=
1061 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1062
1063 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1064 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1065 info->fw = adev->gfx.ce_fw;
1066 header = (const struct common_firmware_header *)info->fw->data;
1067 adev->firmware.fw_size +=
1068 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1069
1070 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1071 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1072 info->fw = adev->gfx.rlc_fw;
1073 header = (const struct common_firmware_header *)info->fw->data;
1074 adev->firmware.fw_size +=
1075 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1076
1077 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1078 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1079 info->fw = adev->gfx.mec_fw;
1080 header = (const struct common_firmware_header *)info->fw->data;
1081 adev->firmware.fw_size +=
1082 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1083
4c2b2453
ML
1084 /* we need account JT in */
1085 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1086 adev->firmware.fw_size +=
1087 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1088
bed5712e
ML
1089 if (amdgpu_sriov_vf(adev)) {
1090 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1091 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1092 info->fw = adev->gfx.mec_fw;
1093 adev->firmware.fw_size +=
1094 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1095 }
1096
aaa36a97
AD
1097 if (adev->gfx.mec2_fw) {
1098 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1099 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1100 info->fw = adev->gfx.mec2_fw;
1101 header = (const struct common_firmware_header *)info->fw->data;
1102 adev->firmware.fw_size +=
1103 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1104 }
1105
1106 }
1107
1108out:
1109 if (err) {
1110 dev_err(adev->dev,
1111 "gfx8: Failed to load firmware \"%s\"\n",
1112 fw_name);
1113 release_firmware(adev->gfx.pfp_fw);
1114 adev->gfx.pfp_fw = NULL;
1115 release_firmware(adev->gfx.me_fw);
1116 adev->gfx.me_fw = NULL;
1117 release_firmware(adev->gfx.ce_fw);
1118 adev->gfx.ce_fw = NULL;
1119 release_firmware(adev->gfx.rlc_fw);
1120 adev->gfx.rlc_fw = NULL;
1121 release_firmware(adev->gfx.mec_fw);
1122 adev->gfx.mec_fw = NULL;
1123 release_firmware(adev->gfx.mec2_fw);
1124 adev->gfx.mec2_fw = NULL;
1125 }
1126 return err;
1127}
1128
2b6cd977
EH
1129static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1130 volatile u32 *buffer)
1131{
1132 u32 count = 0, i;
1133 const struct cs_section_def *sect = NULL;
1134 const struct cs_extent_def *ext = NULL;
1135
1136 if (adev->gfx.rlc.cs_data == NULL)
1137 return;
1138 if (buffer == NULL)
1139 return;
1140
1141 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1142 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1143
1144 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1145 buffer[count++] = cpu_to_le32(0x80000000);
1146 buffer[count++] = cpu_to_le32(0x80000000);
1147
1148 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1149 for (ext = sect->section; ext->extent != NULL; ++ext) {
1150 if (sect->id == SECT_CONTEXT) {
1151 buffer[count++] =
1152 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1153 buffer[count++] = cpu_to_le32(ext->reg_index -
1154 PACKET3_SET_CONTEXT_REG_START);
1155 for (i = 0; i < ext->reg_count; i++)
1156 buffer[count++] = cpu_to_le32(ext->extent[i]);
1157 } else {
1158 return;
1159 }
1160 }
1161 }
1162
1163 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1164 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1165 PACKET3_SET_CONTEXT_REG_START);
34817db6
AD
1166 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1167 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
2b6cd977
EH
1168
1169 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1170 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1171
1172 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1173 buffer[count++] = cpu_to_le32(0);
1174}
1175
fb16007b
AD
1176static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1177{
1178 const __le32 *fw_data;
1179 volatile u32 *dst_ptr;
1180 int me, i, max_me = 4;
1181 u32 bo_offset = 0;
1182 u32 table_offset, table_size;
1183
1184 if (adev->asic_type == CHIP_CARRIZO)
1185 max_me = 5;
1186
1187 /* write the cp table buffer */
1188 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1189 for (me = 0; me < max_me; me++) {
1190 if (me == 0) {
1191 const struct gfx_firmware_header_v1_0 *hdr =
1192 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1193 fw_data = (const __le32 *)
1194 (adev->gfx.ce_fw->data +
1195 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1196 table_offset = le32_to_cpu(hdr->jt_offset);
1197 table_size = le32_to_cpu(hdr->jt_size);
1198 } else if (me == 1) {
1199 const struct gfx_firmware_header_v1_0 *hdr =
1200 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1201 fw_data = (const __le32 *)
1202 (adev->gfx.pfp_fw->data +
1203 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1204 table_offset = le32_to_cpu(hdr->jt_offset);
1205 table_size = le32_to_cpu(hdr->jt_size);
1206 } else if (me == 2) {
1207 const struct gfx_firmware_header_v1_0 *hdr =
1208 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1209 fw_data = (const __le32 *)
1210 (adev->gfx.me_fw->data +
1211 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1212 table_offset = le32_to_cpu(hdr->jt_offset);
1213 table_size = le32_to_cpu(hdr->jt_size);
1214 } else if (me == 3) {
1215 const struct gfx_firmware_header_v1_0 *hdr =
1216 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1217 fw_data = (const __le32 *)
1218 (adev->gfx.mec_fw->data +
1219 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1220 table_offset = le32_to_cpu(hdr->jt_offset);
1221 table_size = le32_to_cpu(hdr->jt_size);
1222 } else if (me == 4) {
1223 const struct gfx_firmware_header_v1_0 *hdr =
1224 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1225 fw_data = (const __le32 *)
1226 (adev->gfx.mec2_fw->data +
1227 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1228 table_offset = le32_to_cpu(hdr->jt_offset);
1229 table_size = le32_to_cpu(hdr->jt_size);
1230 }
1231
1232 for (i = 0; i < table_size; i ++) {
1233 dst_ptr[bo_offset + i] =
1234 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1235 }
1236
1237 bo_offset += table_size;
1238 }
1239}
1240
2b6cd977
EH
1241static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1242{
1243 int r;
1244
1245 /* clear state block */
1246 if (adev->gfx.rlc.clear_state_obj) {
c81a1a74 1247 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
2b6cd977 1248 if (unlikely(r != 0))
62d2ce4b 1249 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
2b6cd977
EH
1250 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1251 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
2b6cd977
EH
1252 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1253 adev->gfx.rlc.clear_state_obj = NULL;
1254 }
fb16007b
AD
1255
1256 /* jump table block */
1257 if (adev->gfx.rlc.cp_table_obj) {
c81a1a74 1258 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true);
fb16007b
AD
1259 if (unlikely(r != 0))
1260 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1261 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1262 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1263 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1264 adev->gfx.rlc.cp_table_obj = NULL;
1265 }
2b6cd977
EH
1266}
1267
1268static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1269{
1270 volatile u32 *dst_ptr;
1271 u32 dws;
1272 const struct cs_section_def *cs_data;
1273 int r;
1274
1275 adev->gfx.rlc.cs_data = vi_cs_data;
1276
1277 cs_data = adev->gfx.rlc.cs_data;
1278
1279 if (cs_data) {
1280 /* clear state block */
1281 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1282
1283 if (adev->gfx.rlc.clear_state_obj == NULL) {
1284 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1285 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1286 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1287 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
2b6cd977
EH
1288 NULL, NULL,
1289 &adev->gfx.rlc.clear_state_obj);
1290 if (r) {
1291 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1292 gfx_v8_0_rlc_fini(adev);
1293 return r;
1294 }
1295 }
1296 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1297 if (unlikely(r != 0)) {
1298 gfx_v8_0_rlc_fini(adev);
1299 return r;
1300 }
1301 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1302 &adev->gfx.rlc.clear_state_gpu_addr);
1303 if (r) {
1304 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
62d2ce4b 1305 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
2b6cd977
EH
1306 gfx_v8_0_rlc_fini(adev);
1307 return r;
1308 }
1309
1310 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1311 if (r) {
62d2ce4b 1312 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
2b6cd977
EH
1313 gfx_v8_0_rlc_fini(adev);
1314 return r;
1315 }
1316 /* set up the cs buffer */
1317 dst_ptr = adev->gfx.rlc.cs_ptr;
1318 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1319 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1320 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1321 }
1322
fb16007b
AD
1323 if ((adev->asic_type == CHIP_CARRIZO) ||
1324 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1325 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
fb16007b
AD
1326 if (adev->gfx.rlc.cp_table_obj == NULL) {
1327 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1328 AMDGPU_GEM_DOMAIN_VRAM,
03f48dd5
CK
1329 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1330 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
fb16007b
AD
1331 NULL, NULL,
1332 &adev->gfx.rlc.cp_table_obj);
1333 if (r) {
1334 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1335 return r;
1336 }
1337 }
1338
1339 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1340 if (unlikely(r != 0)) {
1341 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1342 return r;
1343 }
1344 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1345 &adev->gfx.rlc.cp_table_gpu_addr);
1346 if (r) {
1347 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
62d2ce4b 1348 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
fb16007b
AD
1349 return r;
1350 }
1351 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1352 if (r) {
1353 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1354 return r;
1355 }
1356
1357 cz_init_cp_jump_table(adev);
1358
1359 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1360 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
fb16007b
AD
1361 }
1362
2b6cd977
EH
1363 return 0;
1364}
1365
aaa36a97
AD
1366static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1367{
1368 int r;
1369
1370 if (adev->gfx.mec.hpd_eop_obj) {
c81a1a74 1371 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true);
aaa36a97
AD
1372 if (unlikely(r != 0))
1373 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1374 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1375 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
aaa36a97
AD
1376 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1377 adev->gfx.mec.hpd_eop_obj = NULL;
1378 }
1379}
1380
4e638ae9
XY
1381static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1382 struct amdgpu_ring *ring,
1383 struct amdgpu_irq_src *irq)
1384{
34534610 1385 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4e638ae9
XY
1386 int r = 0;
1387
cdf6adb2
SL
1388 mutex_init(&kiq->ring_mutex);
1389
bffa2280
ML
1390 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1391 if (r)
1392 return r;
880e87e3 1393
4e638ae9
XY
1394 ring->adev = NULL;
1395 ring->ring_obj = NULL;
1396 ring->use_doorbell = true;
1397 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1398 if (adev->gfx.mec2_fw) {
1399 ring->me = 2;
1400 ring->pipe = 0;
1401 } else {
1402 ring->me = 1;
1403 ring->pipe = 1;
1404 }
1405
4e638ae9 1406 ring->queue = 0;
34534610 1407 ring->eop_gpu_addr = kiq->eop_gpu_addr;
4e638ae9
XY
1408 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1409 r = amdgpu_ring_init(adev, ring, 1024,
1410 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1411 if (r)
1412 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1413
1414 return r;
1415}
2d0806ca 1416
4e638ae9
XY
1417static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1418 struct amdgpu_irq_src *irq)
1419{
bffa2280 1420 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
4e638ae9 1421 amdgpu_ring_fini(ring);
4e638ae9
XY
1422}
1423
78c16834
AR
1424static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
1425{
1426 int i, queue, pipe, mec;
1427
1428 /* policy for amdgpu compute queue ownership */
1429 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
1430 queue = i % adev->gfx.mec.num_queue_per_pipe;
1431 pipe = (i / adev->gfx.mec.num_queue_per_pipe)
1432 % adev->gfx.mec.num_pipe_per_mec;
1433 mec = (i / adev->gfx.mec.num_queue_per_pipe)
1434 / adev->gfx.mec.num_pipe_per_mec;
1435
1436 /* we've run out of HW */
1437 if (mec >= adev->gfx.mec.num_mec)
1438 break;
1439
1440 /* policy: amdgpu owns all queues in the first pipe */
1441 if (mec == 0 && pipe == 0)
1442 set_bit(i, adev->gfx.mec.queue_bitmap);
1443 }
1444
1445 /* update the number of active compute rings */
1446 adev->gfx.num_compute_rings =
1447 bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1448
1449 /* If you hit this case and edited the policy, you probably just
1450 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
1451 if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
1452 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
1453}
1454
aaa36a97
AD
1455static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1456{
1457 int r;
1458 u32 *hpd;
42794b27 1459 size_t mec_hpd_size;
aaa36a97 1460
78c16834
AR
1461 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1462
42794b27
AR
1463 switch (adev->asic_type) {
1464 case CHIP_FIJI:
1465 case CHIP_TONGA:
1466 case CHIP_POLARIS11:
1467 case CHIP_POLARIS12:
1468 case CHIP_POLARIS10:
1469 case CHIP_CARRIZO:
1470 adev->gfx.mec.num_mec = 2;
1471 break;
1472 case CHIP_TOPAZ:
1473 case CHIP_STONEY:
1474 default:
1475 adev->gfx.mec.num_mec = 1;
1476 break;
1477 }
1478
1479 adev->gfx.mec.num_pipe_per_mec = 4;
1480 adev->gfx.mec.num_queue_per_pipe = 8;
1481
78c16834
AR
1482 /* take ownership of the relevant compute queues */
1483 gfx_v8_0_compute_queue_acquire(adev);
1484
1485 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
aaa36a97
AD
1486
1487 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1488 r = amdgpu_bo_create(adev,
42794b27 1489 mec_hpd_size,
aaa36a97 1490 PAGE_SIZE, true,
72d7668b 1491 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
1492 &adev->gfx.mec.hpd_eop_obj);
1493 if (r) {
1494 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1495 return r;
1496 }
1497 }
1498
1499 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1500 if (unlikely(r != 0)) {
1501 gfx_v8_0_mec_fini(adev);
1502 return r;
1503 }
1504 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1505 &adev->gfx.mec.hpd_eop_gpu_addr);
1506 if (r) {
1507 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1508 gfx_v8_0_mec_fini(adev);
1509 return r;
1510 }
1511 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1512 if (r) {
1513 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1514 gfx_v8_0_mec_fini(adev);
1515 return r;
1516 }
1517
42794b27 1518 memset(hpd, 0, mec_hpd_size);
aaa36a97
AD
1519
1520 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1521 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1522
1523 return 0;
1524}
1525
4e638ae9
XY
1526static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1527{
1528 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1529
1530 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
4e638ae9
XY
1531}
1532
1533static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1534{
1535 int r;
1536 u32 *hpd;
1537 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1538
268cb4c7 1539 r = amdgpu_bo_create_kernel(adev, GFX8_MEC_HPD_SIZE, PAGE_SIZE,
4e638ae9
XY
1540 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1541 &kiq->eop_gpu_addr, (void **)&hpd);
1542 if (r) {
1543 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1544 return r;
1545 }
1546
268cb4c7 1547 memset(hpd, 0, GFX8_MEC_HPD_SIZE);
4e638ae9 1548
c81a1a74 1549 r = amdgpu_bo_reserve(kiq->eop_obj, true);
f2effd49
AD
1550 if (unlikely(r != 0))
1551 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
4e638ae9 1552 amdgpu_bo_kunmap(kiq->eop_obj);
f2effd49 1553 amdgpu_bo_unreserve(kiq->eop_obj);
4e638ae9
XY
1554
1555 return 0;
1556}
1557
ccba7691
AD
1558static const u32 vgpr_init_compute_shader[] =
1559{
1560 0x7e000209, 0x7e020208,
1561 0x7e040207, 0x7e060206,
1562 0x7e080205, 0x7e0a0204,
1563 0x7e0c0203, 0x7e0e0202,
1564 0x7e100201, 0x7e120200,
1565 0x7e140209, 0x7e160208,
1566 0x7e180207, 0x7e1a0206,
1567 0x7e1c0205, 0x7e1e0204,
1568 0x7e200203, 0x7e220202,
1569 0x7e240201, 0x7e260200,
1570 0x7e280209, 0x7e2a0208,
1571 0x7e2c0207, 0x7e2e0206,
1572 0x7e300205, 0x7e320204,
1573 0x7e340203, 0x7e360202,
1574 0x7e380201, 0x7e3a0200,
1575 0x7e3c0209, 0x7e3e0208,
1576 0x7e400207, 0x7e420206,
1577 0x7e440205, 0x7e460204,
1578 0x7e480203, 0x7e4a0202,
1579 0x7e4c0201, 0x7e4e0200,
1580 0x7e500209, 0x7e520208,
1581 0x7e540207, 0x7e560206,
1582 0x7e580205, 0x7e5a0204,
1583 0x7e5c0203, 0x7e5e0202,
1584 0x7e600201, 0x7e620200,
1585 0x7e640209, 0x7e660208,
1586 0x7e680207, 0x7e6a0206,
1587 0x7e6c0205, 0x7e6e0204,
1588 0x7e700203, 0x7e720202,
1589 0x7e740201, 0x7e760200,
1590 0x7e780209, 0x7e7a0208,
1591 0x7e7c0207, 0x7e7e0206,
1592 0xbf8a0000, 0xbf810000,
1593};
1594
1595static const u32 sgpr_init_compute_shader[] =
1596{
1597 0xbe8a0100, 0xbe8c0102,
1598 0xbe8e0104, 0xbe900106,
1599 0xbe920108, 0xbe940100,
1600 0xbe960102, 0xbe980104,
1601 0xbe9a0106, 0xbe9c0108,
1602 0xbe9e0100, 0xbea00102,
1603 0xbea20104, 0xbea40106,
1604 0xbea60108, 0xbea80100,
1605 0xbeaa0102, 0xbeac0104,
1606 0xbeae0106, 0xbeb00108,
1607 0xbeb20100, 0xbeb40102,
1608 0xbeb60104, 0xbeb80106,
1609 0xbeba0108, 0xbebc0100,
1610 0xbebe0102, 0xbec00104,
1611 0xbec20106, 0xbec40108,
1612 0xbec60100, 0xbec80102,
1613 0xbee60004, 0xbee70005,
1614 0xbeea0006, 0xbeeb0007,
1615 0xbee80008, 0xbee90009,
1616 0xbefc0000, 0xbf8a0000,
1617 0xbf810000, 0x00000000,
1618};
1619
1620static const u32 vgpr_init_regs[] =
1621{
1622 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1623 mmCOMPUTE_RESOURCE_LIMITS, 0,
1624 mmCOMPUTE_NUM_THREAD_X, 256*4,
1625 mmCOMPUTE_NUM_THREAD_Y, 1,
1626 mmCOMPUTE_NUM_THREAD_Z, 1,
1627 mmCOMPUTE_PGM_RSRC2, 20,
1628 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1629 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1630 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1631 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1632 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1633 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1634 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1635 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1636 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1637 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1638};
1639
1640static const u32 sgpr1_init_regs[] =
1641{
1642 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1643 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1644 mmCOMPUTE_NUM_THREAD_X, 256*5,
1645 mmCOMPUTE_NUM_THREAD_Y, 1,
1646 mmCOMPUTE_NUM_THREAD_Z, 1,
1647 mmCOMPUTE_PGM_RSRC2, 20,
1648 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1649 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1650 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1651 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1652 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1653 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1654 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1655 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1656 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1657 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1658};
1659
1660static const u32 sgpr2_init_regs[] =
1661{
1662 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1663 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1664 mmCOMPUTE_NUM_THREAD_X, 256*5,
1665 mmCOMPUTE_NUM_THREAD_Y, 1,
1666 mmCOMPUTE_NUM_THREAD_Z, 1,
1667 mmCOMPUTE_PGM_RSRC2, 20,
1668 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1669 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1670 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1671 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1672 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1673 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1674 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1675 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1676 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1677 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1678};
1679
1680static const u32 sec_ded_counter_registers[] =
1681{
1682 mmCPC_EDC_ATC_CNT,
1683 mmCPC_EDC_SCRATCH_CNT,
1684 mmCPC_EDC_UCODE_CNT,
1685 mmCPF_EDC_ATC_CNT,
1686 mmCPF_EDC_ROQ_CNT,
1687 mmCPF_EDC_TAG_CNT,
1688 mmCPG_EDC_ATC_CNT,
1689 mmCPG_EDC_DMA_CNT,
1690 mmCPG_EDC_TAG_CNT,
1691 mmDC_EDC_CSINVOC_CNT,
1692 mmDC_EDC_RESTORE_CNT,
1693 mmDC_EDC_STATE_CNT,
1694 mmGDS_EDC_CNT,
1695 mmGDS_EDC_GRBM_CNT,
1696 mmGDS_EDC_OA_DED,
1697 mmSPI_EDC_CNT,
1698 mmSQC_ATC_EDC_GATCL1_CNT,
1699 mmSQC_EDC_CNT,
1700 mmSQ_EDC_DED_CNT,
1701 mmSQ_EDC_INFO,
1702 mmSQ_EDC_SEC_CNT,
1703 mmTCC_EDC_CNT,
1704 mmTCP_ATC_EDC_GATCL1_CNT,
1705 mmTCP_EDC_CNT,
1706 mmTD_EDC_CNT
1707};
1708
1709static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1710{
1711 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1712 struct amdgpu_ib ib;
f54d1867 1713 struct dma_fence *f = NULL;
ccba7691
AD
1714 int r, i;
1715 u32 tmp;
1716 unsigned total_size, vgpr_offset, sgpr_offset;
1717 u64 gpu_addr;
1718
1719 /* only supported on CZ */
1720 if (adev->asic_type != CHIP_CARRIZO)
1721 return 0;
1722
1723 /* bail if the compute ring is not ready */
1724 if (!ring->ready)
1725 return 0;
1726
1727 tmp = RREG32(mmGB_EDC_MODE);
1728 WREG32(mmGB_EDC_MODE, 0);
1729
1730 total_size =
1731 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1732 total_size +=
1733 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1734 total_size +=
1735 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1736 total_size = ALIGN(total_size, 256);
1737 vgpr_offset = total_size;
1738 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1739 sgpr_offset = total_size;
1740 total_size += sizeof(sgpr_init_compute_shader);
1741
1742 /* allocate an indirect buffer to put the commands in */
1743 memset(&ib, 0, sizeof(ib));
b07c60c0 1744 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1745 if (r) {
1746 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1747 return r;
1748 }
1749
1750 /* load the compute shaders */
1751 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1752 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1753
1754 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1755 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1756
1757 /* init the ib length to 0 */
1758 ib.length_dw = 0;
1759
1760 /* VGPR */
1761 /* write the register state for the compute dispatch */
1762 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1763 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1764 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1765 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1766 }
1767 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1768 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1769 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1770 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1771 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1772 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1773
1774 /* write dispatch packet */
1775 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1776 ib.ptr[ib.length_dw++] = 8; /* x */
1777 ib.ptr[ib.length_dw++] = 1; /* y */
1778 ib.ptr[ib.length_dw++] = 1; /* z */
1779 ib.ptr[ib.length_dw++] =
1780 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1781
1782 /* write CS partial flush packet */
1783 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1784 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1785
1786 /* SGPR1 */
1787 /* write the register state for the compute dispatch */
1788 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1789 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1790 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1791 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1792 }
1793 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1794 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1795 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1796 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1797 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1798 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1799
1800 /* write dispatch packet */
1801 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1802 ib.ptr[ib.length_dw++] = 8; /* x */
1803 ib.ptr[ib.length_dw++] = 1; /* y */
1804 ib.ptr[ib.length_dw++] = 1; /* z */
1805 ib.ptr[ib.length_dw++] =
1806 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1807
1808 /* write CS partial flush packet */
1809 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1810 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1811
1812 /* SGPR2 */
1813 /* write the register state for the compute dispatch */
1814 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1815 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1816 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1817 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1818 }
1819 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1820 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1821 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1822 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1823 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1824 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1825
1826 /* write dispatch packet */
1827 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1828 ib.ptr[ib.length_dw++] = 8; /* x */
1829 ib.ptr[ib.length_dw++] = 1; /* y */
1830 ib.ptr[ib.length_dw++] = 1; /* z */
1831 ib.ptr[ib.length_dw++] =
1832 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1833
1834 /* write CS partial flush packet */
1835 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1836 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1837
1838 /* shedule the ib on the ring */
50ddc75e 1839 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
ccba7691
AD
1840 if (r) {
1841 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1842 goto fail;
1843 }
1844
1845 /* wait for the GPU to finish processing the IB */
f54d1867 1846 r = dma_fence_wait(f, false);
ccba7691
AD
1847 if (r) {
1848 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1849 goto fail;
1850 }
1851
1852 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1853 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1854 WREG32(mmGB_EDC_MODE, tmp);
1855
1856 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1857 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1858 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1859
1860
1861 /* read back registers to clear the counters */
1862 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1863 RREG32(sec_ded_counter_registers[i]);
1864
1865fail:
cc55c45d 1866 amdgpu_ib_free(adev, &ib, NULL);
f54d1867 1867 dma_fence_put(f);
ccba7691
AD
1868
1869 return r;
1870}
1871
68182d90 1872static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1873{
1874 u32 gb_addr_config;
1875 u32 mc_shared_chmap, mc_arb_ramcfg;
1876 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1877 u32 tmp;
68182d90 1878 int ret;
0bde3a95
AD
1879
1880 switch (adev->asic_type) {
1881 case CHIP_TOPAZ:
1882 adev->gfx.config.max_shader_engines = 1;
1883 adev->gfx.config.max_tile_pipes = 2;
1884 adev->gfx.config.max_cu_per_sh = 6;
1885 adev->gfx.config.max_sh_per_se = 1;
1886 adev->gfx.config.max_backends_per_se = 2;
1887 adev->gfx.config.max_texture_channel_caches = 2;
1888 adev->gfx.config.max_gprs = 256;
1889 adev->gfx.config.max_gs_threads = 32;
1890 adev->gfx.config.max_hw_contexts = 8;
1891
1892 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1893 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1894 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1895 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1896 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1897 break;
1898 case CHIP_FIJI:
1899 adev->gfx.config.max_shader_engines = 4;
1900 adev->gfx.config.max_tile_pipes = 16;
1901 adev->gfx.config.max_cu_per_sh = 16;
1902 adev->gfx.config.max_sh_per_se = 1;
1903 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1904 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1905 adev->gfx.config.max_gprs = 256;
1906 adev->gfx.config.max_gs_threads = 32;
1907 adev->gfx.config.max_hw_contexts = 8;
1908
68182d90
FC
1909 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1910 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1911 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1912 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1913 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1914 break;
2cc0c0b5 1915 case CHIP_POLARIS11:
c4642a47 1916 case CHIP_POLARIS12:
68182d90
FC
1917 ret = amdgpu_atombios_get_gfx_info(adev);
1918 if (ret)
1919 return ret;
1920 adev->gfx.config.max_gprs = 256;
1921 adev->gfx.config.max_gs_threads = 32;
1922 adev->gfx.config.max_hw_contexts = 8;
1923
1924 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1925 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1926 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1927 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1928 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1929 break;
2cc0c0b5 1930 case CHIP_POLARIS10:
68182d90
FC
1931 ret = amdgpu_atombios_get_gfx_info(adev);
1932 if (ret)
1933 return ret;
1934 adev->gfx.config.max_gprs = 256;
1935 adev->gfx.config.max_gs_threads = 32;
1936 adev->gfx.config.max_hw_contexts = 8;
1937
0bde3a95
AD
1938 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1939 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1940 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1941 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1942 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1943 break;
1944 case CHIP_TONGA:
1945 adev->gfx.config.max_shader_engines = 4;
1946 adev->gfx.config.max_tile_pipes = 8;
1947 adev->gfx.config.max_cu_per_sh = 8;
1948 adev->gfx.config.max_sh_per_se = 1;
1949 adev->gfx.config.max_backends_per_se = 2;
1950 adev->gfx.config.max_texture_channel_caches = 8;
1951 adev->gfx.config.max_gprs = 256;
1952 adev->gfx.config.max_gs_threads = 32;
1953 adev->gfx.config.max_hw_contexts = 8;
1954
1955 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1956 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1957 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1958 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1959 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1960 break;
1961 case CHIP_CARRIZO:
1962 adev->gfx.config.max_shader_engines = 1;
1963 adev->gfx.config.max_tile_pipes = 2;
1964 adev->gfx.config.max_sh_per_se = 1;
1965 adev->gfx.config.max_backends_per_se = 2;
1966
1967 switch (adev->pdev->revision) {
1968 case 0xc4:
1969 case 0x84:
1970 case 0xc8:
1971 case 0xcc:
b8b339ea
AD
1972 case 0xe1:
1973 case 0xe3:
0bde3a95
AD
1974 /* B10 */
1975 adev->gfx.config.max_cu_per_sh = 8;
1976 break;
1977 case 0xc5:
1978 case 0x81:
1979 case 0x85:
1980 case 0xc9:
1981 case 0xcd:
b8b339ea
AD
1982 case 0xe2:
1983 case 0xe4:
0bde3a95
AD
1984 /* B8 */
1985 adev->gfx.config.max_cu_per_sh = 6;
1986 break;
1987 case 0xc6:
1988 case 0xca:
1989 case 0xce:
b8b339ea 1990 case 0x88:
80112bff 1991 case 0xe6:
0bde3a95
AD
1992 /* B6 */
1993 adev->gfx.config.max_cu_per_sh = 6;
1994 break;
1995 case 0xc7:
1996 case 0x87:
1997 case 0xcb:
b8b339ea
AD
1998 case 0xe5:
1999 case 0x89:
0bde3a95
AD
2000 default:
2001 /* B4 */
2002 adev->gfx.config.max_cu_per_sh = 4;
2003 break;
2004 }
2005
2006 adev->gfx.config.max_texture_channel_caches = 2;
2007 adev->gfx.config.max_gprs = 256;
2008 adev->gfx.config.max_gs_threads = 32;
2009 adev->gfx.config.max_hw_contexts = 8;
2010
e3c7656c
SL
2011 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2012 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2013 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2014 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2015 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
2016 break;
2017 case CHIP_STONEY:
2018 adev->gfx.config.max_shader_engines = 1;
2019 adev->gfx.config.max_tile_pipes = 2;
2020 adev->gfx.config.max_sh_per_se = 1;
2021 adev->gfx.config.max_backends_per_se = 1;
2022
2023 switch (adev->pdev->revision) {
80112bff
AD
2024 case 0x80:
2025 case 0x81:
e3c7656c
SL
2026 case 0xc0:
2027 case 0xc1:
2028 case 0xc2:
2029 case 0xc4:
2030 case 0xc8:
2031 case 0xc9:
80112bff
AD
2032 case 0xd6:
2033 case 0xda:
2034 case 0xe9:
2035 case 0xea:
e3c7656c
SL
2036 adev->gfx.config.max_cu_per_sh = 3;
2037 break;
80112bff 2038 case 0x83:
e3c7656c
SL
2039 case 0xd0:
2040 case 0xd1:
2041 case 0xd2:
80112bff
AD
2042 case 0xd4:
2043 case 0xdb:
2044 case 0xe1:
2045 case 0xe2:
e3c7656c
SL
2046 default:
2047 adev->gfx.config.max_cu_per_sh = 2;
2048 break;
2049 }
2050
2051 adev->gfx.config.max_texture_channel_caches = 2;
2052 adev->gfx.config.max_gprs = 256;
2053 adev->gfx.config.max_gs_threads = 16;
2054 adev->gfx.config.max_hw_contexts = 8;
2055
0bde3a95
AD
2056 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2057 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2058 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2059 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2060 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
2061 break;
2062 default:
2063 adev->gfx.config.max_shader_engines = 2;
2064 adev->gfx.config.max_tile_pipes = 4;
2065 adev->gfx.config.max_cu_per_sh = 2;
2066 adev->gfx.config.max_sh_per_se = 1;
2067 adev->gfx.config.max_backends_per_se = 2;
2068 adev->gfx.config.max_texture_channel_caches = 4;
2069 adev->gfx.config.max_gprs = 256;
2070 adev->gfx.config.max_gs_threads = 32;
2071 adev->gfx.config.max_hw_contexts = 8;
2072
2073 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2076 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2077 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2078 break;
2079 }
2080
2081 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2082 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2083 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2084
2085 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2086 adev->gfx.config.mem_max_burst_length_bytes = 256;
2087 if (adev->flags & AMD_IS_APU) {
2088 /* Get memory bank mapping mode. */
2089 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2090 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2091 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2092
2093 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2094 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2095 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2096
2097 /* Validate settings in case only one DIMM installed. */
2098 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2099 dimm00_addr_map = 0;
2100 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2101 dimm01_addr_map = 0;
2102 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2103 dimm10_addr_map = 0;
2104 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2105 dimm11_addr_map = 0;
2106
2107 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2108 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2109 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2110 adev->gfx.config.mem_row_size_in_kb = 2;
2111 else
2112 adev->gfx.config.mem_row_size_in_kb = 1;
2113 } else {
2114 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2115 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2116 if (adev->gfx.config.mem_row_size_in_kb > 4)
2117 adev->gfx.config.mem_row_size_in_kb = 4;
2118 }
2119
2120 adev->gfx.config.shader_engine_tile_size = 32;
2121 adev->gfx.config.num_gpus = 1;
2122 adev->gfx.config.multi_gpu_tile_size = 64;
2123
2124 /* fix up row size */
2125 switch (adev->gfx.config.mem_row_size_in_kb) {
2126 case 1:
2127 default:
2128 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2129 break;
2130 case 2:
2131 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2132 break;
2133 case 4:
2134 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2135 break;
2136 }
2137 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
2138
2139 return 0;
0bde3a95
AD
2140}
2141
e33fec48
AR
2142static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2143 int mec, int pipe, int queue)
2144{
2145 int r;
2146 unsigned irq_type;
2147 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2148
2149 ring = &adev->gfx.compute_ring[ring_id];
2150
2151 /* mec0 is me1 */
2152 ring->me = mec + 1;
2153 ring->pipe = pipe;
2154 ring->queue = queue;
2155
2156 ring->ring_obj = NULL;
2157 ring->use_doorbell = true;
2158 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2159 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2160 + (ring_id * GFX8_MEC_HPD_SIZE);
2161 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2162
2163 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2164 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2165 + ring->pipe;
2166
2167 /* type-2 packets are deprecated on MEC, use type-3 instead */
2168 r = amdgpu_ring_init(adev, ring, 1024,
2169 &adev->gfx.eop_irq, irq_type);
2170 if (r)
2171 return r;
2172
2173
2174 return 0;
2175}
2176
5fc3aeeb 2177static int gfx_v8_0_sw_init(void *handle)
aaa36a97 2178{
e33fec48 2179 int i, j, k, r, ring_id;
aaa36a97 2180 struct amdgpu_ring *ring;
4e638ae9 2181 struct amdgpu_kiq *kiq;
5fc3aeeb 2182 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2183
4e638ae9 2184 /* KIQ event */
d766e6a3 2185 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
4e638ae9
XY
2186 if (r)
2187 return r;
2188
aaa36a97 2189 /* EOP Event */
d766e6a3 2190 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
aaa36a97
AD
2191 if (r)
2192 return r;
2193
2194 /* Privileged reg */
d766e6a3
AD
2195 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
2196 &adev->gfx.priv_reg_irq);
aaa36a97
AD
2197 if (r)
2198 return r;
2199
2200 /* Privileged inst */
d766e6a3
AD
2201 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
2202 &adev->gfx.priv_inst_irq);
aaa36a97
AD
2203 if (r)
2204 return r;
2205
2206 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2207
2208 gfx_v8_0_scratch_init(adev);
2209
2210 r = gfx_v8_0_init_microcode(adev);
2211 if (r) {
2212 DRM_ERROR("Failed to load gfx firmware!\n");
2213 return r;
2214 }
2215
2b6cd977
EH
2216 r = gfx_v8_0_rlc_init(adev);
2217 if (r) {
2218 DRM_ERROR("Failed to init rlc BOs!\n");
2219 return r;
2220 }
2221
aaa36a97
AD
2222 r = gfx_v8_0_mec_init(adev);
2223 if (r) {
2224 DRM_ERROR("Failed to init MEC BOs!\n");
2225 return r;
2226 }
2227
aaa36a97
AD
2228 /* set up the gfx ring */
2229 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2230 ring = &adev->gfx.gfx_ring[i];
2231 ring->ring_obj = NULL;
2232 sprintf(ring->name, "gfx");
2233 /* no gfx doorbells on iceland */
2234 if (adev->asic_type != CHIP_TOPAZ) {
2235 ring->use_doorbell = true;
2236 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2237 }
2238
79887142
CK
2239 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2240 AMDGPU_CP_IRQ_GFX_EOP);
aaa36a97
AD
2241 if (r)
2242 return r;
2243 }
2244
78c16834 2245
e33fec48
AR
2246 /* set up the compute queues - allocate horizontally across pipes */
2247 ring_id = 0;
2248 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2249 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2250 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2251 if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
2252 continue;
78c16834 2253
e33fec48
AR
2254 r = gfx_v8_0_compute_ring_init(adev,
2255 ring_id,
2256 i, k, j);
2257 if (r)
2258 return r;
78c16834 2259
e33fec48
AR
2260 ring_id++;
2261 }
2262 }
aaa36a97
AD
2263 }
2264
b4e40676
DP
2265 r = gfx_v8_0_kiq_init(adev);
2266 if (r) {
2267 DRM_ERROR("Failed to init KIQ BOs!\n");
2268 return r;
2269 }
596c67d0 2270
b4e40676
DP
2271 kiq = &adev->gfx.kiq;
2272 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2273 if (r)
2274 return r;
596c67d0 2275
b4e40676
DP
2276 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2277 r = gfx_v8_0_compute_mqd_sw_init(adev);
2278 if (r)
2279 return r;
596c67d0 2280
aaa36a97 2281 /* reserve GDS, GWS and OA resource for gfx */
78bbbd9c
CK
2282 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2283 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2284 &adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2285 if (r)
2286 return r;
2287
78bbbd9c
CK
2288 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2289 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2290 &adev->gds.gws_gfx_bo, NULL, NULL);
aaa36a97
AD
2291 if (r)
2292 return r;
2293
78bbbd9c
CK
2294 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2295 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2296 &adev->gds.oa_gfx_bo, NULL, NULL);
aaa36a97
AD
2297 if (r)
2298 return r;
2299
a101a899
KW
2300 adev->gfx.ce_ram_size = 0x8000;
2301
68182d90
FC
2302 r = gfx_v8_0_gpu_early_init(adev);
2303 if (r)
2304 return r;
0bde3a95 2305
aaa36a97
AD
2306 return 0;
2307}
2308
5fc3aeeb 2309static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2310{
2311 int i;
5fc3aeeb 2312 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97 2313
8640faed
JZ
2314 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2315 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2316 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
aaa36a97
AD
2317
2318 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2319 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2320 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2321 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2322
b4e40676
DP
2323 gfx_v8_0_compute_mqd_sw_fini(adev);
2324 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2325 gfx_v8_0_kiq_fini(adev);
596c67d0 2326
aaa36a97 2327 gfx_v8_0_mec_fini(adev);
2b6cd977 2328 gfx_v8_0_rlc_fini(adev);
13331ac3 2329 gfx_v8_0_free_microcode(adev);
2b6cd977 2330
aaa36a97
AD
2331 return 0;
2332}
2333
2334static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2335{
90bea0ab 2336 uint32_t *modearray, *mod2array;
eb64526f
TSD
2337 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2338 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2339 u32 reg_offset;
aaa36a97 2340
90bea0ab
TSD
2341 modearray = adev->gfx.config.tile_mode_array;
2342 mod2array = adev->gfx.config.macrotile_mode_array;
2343
2344 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2345 modearray[reg_offset] = 0;
2346
2347 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2348 mod2array[reg_offset] = 0;
aaa36a97
AD
2349
2350 switch (adev->asic_type) {
2351 case CHIP_TOPAZ:
90bea0ab
TSD
2352 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P2) |
2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357 PIPE_CONFIG(ADDR_SURF_P2) |
2358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2360 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 PIPE_CONFIG(ADDR_SURF_P2) |
2362 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2363 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2364 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P2) |
2366 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2368 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P2) |
2370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2372 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2373 PIPE_CONFIG(ADDR_SURF_P2) |
2374 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2376 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377 PIPE_CONFIG(ADDR_SURF_P2) |
2378 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2381 PIPE_CONFIG(ADDR_SURF_P2));
2382 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2383 PIPE_CONFIG(ADDR_SURF_P2) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 PIPE_CONFIG(ADDR_SURF_P2) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P2) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2394 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2395 PIPE_CONFIG(ADDR_SURF_P2) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 PIPE_CONFIG(ADDR_SURF_P2) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2403 PIPE_CONFIG(ADDR_SURF_P2) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407 PIPE_CONFIG(ADDR_SURF_P2) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2410 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2411 PIPE_CONFIG(ADDR_SURF_P2) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2414 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2415 PIPE_CONFIG(ADDR_SURF_P2) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2418 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2419 PIPE_CONFIG(ADDR_SURF_P2) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2423 PIPE_CONFIG(ADDR_SURF_P2) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2427 PIPE_CONFIG(ADDR_SURF_P2) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2430 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2431 PIPE_CONFIG(ADDR_SURF_P2) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2434 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2438 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2439 PIPE_CONFIG(ADDR_SURF_P2) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2442 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P2) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P2) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P2) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2454
2455 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2458 NUM_BANKS(ADDR_SURF_8_BANK));
2459 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2462 NUM_BANKS(ADDR_SURF_8_BANK));
2463 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2466 NUM_BANKS(ADDR_SURF_8_BANK));
2467 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2470 NUM_BANKS(ADDR_SURF_8_BANK));
2471 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2474 NUM_BANKS(ADDR_SURF_8_BANK));
2475 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478 NUM_BANKS(ADDR_SURF_8_BANK));
2479 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482 NUM_BANKS(ADDR_SURF_8_BANK));
2483 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2486 NUM_BANKS(ADDR_SURF_16_BANK));
2487 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2490 NUM_BANKS(ADDR_SURF_16_BANK));
2491 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2494 NUM_BANKS(ADDR_SURF_16_BANK));
2495 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2498 NUM_BANKS(ADDR_SURF_16_BANK));
2499 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2502 NUM_BANKS(ADDR_SURF_16_BANK));
2503 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2506 NUM_BANKS(ADDR_SURF_16_BANK));
2507 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2510 NUM_BANKS(ADDR_SURF_8_BANK));
2511
2512 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2513 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2514 reg_offset != 23)
2515 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2516
2517 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2518 if (reg_offset != 7)
2519 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2520
8cdacf44 2521 break;
af15a2d5 2522 case CHIP_FIJI:
90bea0ab
TSD
2523 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2528 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2529 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2531 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2537 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2539 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2541 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2543 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2544 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2545 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2548 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2549 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2552 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2553 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2555 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2556 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2557 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2559 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2563 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2565 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2566 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2567 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2568 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2569 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2570 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2572 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2573 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2574 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2575 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2578 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2579 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2582 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2586 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2589 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2590 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2593 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2594 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2595 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2598 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2602 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2606 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2610 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2613 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2614 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2617 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2618 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2621 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2622 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2625 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2626 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2629 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2632 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2636 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2638 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2641 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2642 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2645
2646 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2649 NUM_BANKS(ADDR_SURF_8_BANK));
2650 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2653 NUM_BANKS(ADDR_SURF_8_BANK));
2654 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2656 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657 NUM_BANKS(ADDR_SURF_8_BANK));
2658 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2661 NUM_BANKS(ADDR_SURF_8_BANK));
2662 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665 NUM_BANKS(ADDR_SURF_8_BANK));
2666 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2669 NUM_BANKS(ADDR_SURF_8_BANK));
2670 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2673 NUM_BANKS(ADDR_SURF_8_BANK));
2674 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2677 NUM_BANKS(ADDR_SURF_8_BANK));
2678 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2680 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2681 NUM_BANKS(ADDR_SURF_8_BANK));
2682 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2685 NUM_BANKS(ADDR_SURF_8_BANK));
2686 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2689 NUM_BANKS(ADDR_SURF_8_BANK));
2690 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2692 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2693 NUM_BANKS(ADDR_SURF_8_BANK));
2694 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2697 NUM_BANKS(ADDR_SURF_8_BANK));
2698 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2701 NUM_BANKS(ADDR_SURF_4_BANK));
2702
2703 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2704 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2705
2706 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2707 if (reg_offset != 7)
2708 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2709
5f2e816b 2710 break;
aaa36a97 2711 case CHIP_TONGA:
90bea0ab
TSD
2712 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2713 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2716 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2718 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2720 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2721 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2722 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2724 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2726 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2728 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2730 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2731 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2732 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2733 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2734 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2735 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2736 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2738 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2739 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2740 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2743 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2744 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2745 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2746 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2747 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2754 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2755 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2757 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2758 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2759 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2761 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2762 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2763 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2766 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2769 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2770 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2771 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2773 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2774 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2775 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2778 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2782 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2783 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2786 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2787 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2790 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2791 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2792 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2794 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2795 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2796 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2798 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2799 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2801 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2802 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2805 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2806 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2807 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2808 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2810 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2811 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2814 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2815 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2818 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2819 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2823 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2830 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2833 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2834
2835 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2836 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2837 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2838 NUM_BANKS(ADDR_SURF_16_BANK));
2839 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2841 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2842 NUM_BANKS(ADDR_SURF_16_BANK));
2843 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2845 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846 NUM_BANKS(ADDR_SURF_16_BANK));
2847 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2849 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2850 NUM_BANKS(ADDR_SURF_16_BANK));
2851 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854 NUM_BANKS(ADDR_SURF_16_BANK));
2855 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2858 NUM_BANKS(ADDR_SURF_16_BANK));
2859 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2860 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2861 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2862 NUM_BANKS(ADDR_SURF_16_BANK));
2863 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2865 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2866 NUM_BANKS(ADDR_SURF_16_BANK));
2867 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870 NUM_BANKS(ADDR_SURF_16_BANK));
2871 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2873 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2874 NUM_BANKS(ADDR_SURF_16_BANK));
2875 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2878 NUM_BANKS(ADDR_SURF_16_BANK));
2879 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2882 NUM_BANKS(ADDR_SURF_8_BANK));
2883 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2885 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2886 NUM_BANKS(ADDR_SURF_4_BANK));
2887 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2889 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2890 NUM_BANKS(ADDR_SURF_4_BANK));
2891
2892 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2893 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2894
2895 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2896 if (reg_offset != 7)
2897 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2898
68182d90 2899 break;
2cc0c0b5 2900 case CHIP_POLARIS11:
c4642a47 2901 case CHIP_POLARIS12:
68182d90
FC
2902 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2904 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2906 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2908 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2912 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2914 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2915 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2916 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2918 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2919 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2921 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2922 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2923 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2924 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2925 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2926 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2927 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2928 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2930 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2931 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2932 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2934 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2935 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2936 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2937 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2945 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2948 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2953 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2956 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2957 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2961 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2968 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2969 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2972 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2973 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2975 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2977 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2979 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2981 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2985 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2987 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2988 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2989 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2991 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2992 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2993 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2994 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2996 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2997 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3000 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3001 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3003 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3004 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3007 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3008 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3009 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3010 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3012 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3013 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3016 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3017 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3020 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3021 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3022 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3024
3025 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028 NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3032 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033 NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3037 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3038 NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3042 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3043 NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3048 NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3053 NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3057 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3058 NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3061 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3062 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3063 NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3066 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3067 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068 NUM_BANKS(ADDR_SURF_16_BANK));
3069
3070 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073 NUM_BANKS(ADDR_SURF_16_BANK));
3074
3075 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3077 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3078 NUM_BANKS(ADDR_SURF_16_BANK));
3079
3080 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083 NUM_BANKS(ADDR_SURF_16_BANK));
3084
3085 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3087 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3088 NUM_BANKS(ADDR_SURF_8_BANK));
3089
3090 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3092 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3093 NUM_BANKS(ADDR_SURF_4_BANK));
3094
3095 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3096 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3097
3098 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3099 if (reg_offset != 7)
3100 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3101
3102 break;
2cc0c0b5 3103 case CHIP_POLARIS10:
68182d90
FC
3104 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3105 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3108 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3109 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3110 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3112 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3113 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3116 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3118 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3119 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3120 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3121 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3122 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3124 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3125 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3127 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3128 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3129 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3130 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3131 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3132 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3133 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3134 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3135 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3136 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3137 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3138 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3139 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3140 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3142 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3143 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3144 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3146 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3147 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3150 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3151 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3152 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3154 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3155 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3156 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3158 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3159 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3160 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3162 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3163 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3166 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3167 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3170 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3171 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3174 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3175 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3178 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3179 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3182 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3183 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3186 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3187 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3190 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3191 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3192 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3194 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3195 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3196 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3198 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3199 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3200 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3202 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3203 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3204 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3206 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3207 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3208 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3210 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3211 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3212 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3214 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3215 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3216 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3218 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3219 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3220 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3222 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3223 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3224 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3226
3227 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3230 NUM_BANKS(ADDR_SURF_16_BANK));
3231
3232 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3234 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235 NUM_BANKS(ADDR_SURF_16_BANK));
3236
3237 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240 NUM_BANKS(ADDR_SURF_16_BANK));
3241
3242 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3245 NUM_BANKS(ADDR_SURF_16_BANK));
3246
3247 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3250 NUM_BANKS(ADDR_SURF_16_BANK));
3251
3252 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3255 NUM_BANKS(ADDR_SURF_16_BANK));
3256
3257 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3260 NUM_BANKS(ADDR_SURF_16_BANK));
3261
3262 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 NUM_BANKS(ADDR_SURF_16_BANK));
3266
3267 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3270 NUM_BANKS(ADDR_SURF_16_BANK));
3271
3272 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3275 NUM_BANKS(ADDR_SURF_16_BANK));
3276
3277 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3280 NUM_BANKS(ADDR_SURF_16_BANK));
3281
3282 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3285 NUM_BANKS(ADDR_SURF_8_BANK));
3286
3287 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3290 NUM_BANKS(ADDR_SURF_4_BANK));
3291
3292 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3295 NUM_BANKS(ADDR_SURF_4_BANK));
3296
3297 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3298 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3299
3300 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3301 if (reg_offset != 7)
3302 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3303
aaa36a97 3304 break;
e3c7656c 3305 case CHIP_STONEY:
90bea0ab
TSD
3306 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3307 PIPE_CONFIG(ADDR_SURF_P2) |
3308 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3309 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3310 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3311 PIPE_CONFIG(ADDR_SURF_P2) |
3312 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3314 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3315 PIPE_CONFIG(ADDR_SURF_P2) |
3316 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3317 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3318 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3319 PIPE_CONFIG(ADDR_SURF_P2) |
3320 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3321 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3322 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3323 PIPE_CONFIG(ADDR_SURF_P2) |
3324 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3325 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3326 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3327 PIPE_CONFIG(ADDR_SURF_P2) |
3328 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3330 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3331 PIPE_CONFIG(ADDR_SURF_P2) |
3332 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3333 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3334 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3335 PIPE_CONFIG(ADDR_SURF_P2));
3336 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3337 PIPE_CONFIG(ADDR_SURF_P2) |
3338 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3340 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3341 PIPE_CONFIG(ADDR_SURF_P2) |
3342 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3344 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3345 PIPE_CONFIG(ADDR_SURF_P2) |
3346 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3348 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3349 PIPE_CONFIG(ADDR_SURF_P2) |
3350 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3352 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3353 PIPE_CONFIG(ADDR_SURF_P2) |
3354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3356 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3357 PIPE_CONFIG(ADDR_SURF_P2) |
3358 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3360 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3361 PIPE_CONFIG(ADDR_SURF_P2) |
3362 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3364 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3365 PIPE_CONFIG(ADDR_SURF_P2) |
3366 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3368 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3369 PIPE_CONFIG(ADDR_SURF_P2) |
3370 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3372 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3373 PIPE_CONFIG(ADDR_SURF_P2) |
3374 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3376 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3377 PIPE_CONFIG(ADDR_SURF_P2) |
3378 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3380 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3381 PIPE_CONFIG(ADDR_SURF_P2) |
3382 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3384 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3385 PIPE_CONFIG(ADDR_SURF_P2) |
3386 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3388 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3389 PIPE_CONFIG(ADDR_SURF_P2) |
3390 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3392 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3393 PIPE_CONFIG(ADDR_SURF_P2) |
3394 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3396 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3397 PIPE_CONFIG(ADDR_SURF_P2) |
3398 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3400 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3401 PIPE_CONFIG(ADDR_SURF_P2) |
3402 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3404 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3405 PIPE_CONFIG(ADDR_SURF_P2) |
3406 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3408
3409 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412 NUM_BANKS(ADDR_SURF_8_BANK));
3413 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416 NUM_BANKS(ADDR_SURF_8_BANK));
3417 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3420 NUM_BANKS(ADDR_SURF_8_BANK));
3421 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3424 NUM_BANKS(ADDR_SURF_8_BANK));
3425 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3428 NUM_BANKS(ADDR_SURF_8_BANK));
3429 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3432 NUM_BANKS(ADDR_SURF_8_BANK));
3433 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3436 NUM_BANKS(ADDR_SURF_8_BANK));
3437 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3438 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3439 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3440 NUM_BANKS(ADDR_SURF_16_BANK));
3441 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3442 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3443 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3444 NUM_BANKS(ADDR_SURF_16_BANK));
3445 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3448 NUM_BANKS(ADDR_SURF_16_BANK));
3449 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3450 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3451 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3452 NUM_BANKS(ADDR_SURF_16_BANK));
3453 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3456 NUM_BANKS(ADDR_SURF_16_BANK));
3457 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3460 NUM_BANKS(ADDR_SURF_16_BANK));
3461 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3462 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3463 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3464 NUM_BANKS(ADDR_SURF_8_BANK));
3465
3466 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3467 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3468 reg_offset != 23)
3469 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3470
3471 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3472 if (reg_offset != 7)
3473 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3474
e3c7656c 3475 break;
aaa36a97 3476 default:
90bea0ab
TSD
3477 dev_warn(adev->dev,
3478 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3479 adev->asic_type);
3480
3481 case CHIP_CARRIZO:
3482 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3483 PIPE_CONFIG(ADDR_SURF_P2) |
3484 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3485 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3486 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3487 PIPE_CONFIG(ADDR_SURF_P2) |
3488 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3489 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3490 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3491 PIPE_CONFIG(ADDR_SURF_P2) |
3492 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3493 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3494 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3495 PIPE_CONFIG(ADDR_SURF_P2) |
3496 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3498 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3499 PIPE_CONFIG(ADDR_SURF_P2) |
3500 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3501 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3502 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3503 PIPE_CONFIG(ADDR_SURF_P2) |
3504 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3505 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3506 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3507 PIPE_CONFIG(ADDR_SURF_P2) |
3508 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3510 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3511 PIPE_CONFIG(ADDR_SURF_P2));
3512 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3513 PIPE_CONFIG(ADDR_SURF_P2) |
3514 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3516 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3517 PIPE_CONFIG(ADDR_SURF_P2) |
3518 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3520 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3521 PIPE_CONFIG(ADDR_SURF_P2) |
3522 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3524 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3525 PIPE_CONFIG(ADDR_SURF_P2) |
3526 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3528 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3529 PIPE_CONFIG(ADDR_SURF_P2) |
3530 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3532 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3533 PIPE_CONFIG(ADDR_SURF_P2) |
3534 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3536 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3537 PIPE_CONFIG(ADDR_SURF_P2) |
3538 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3540 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3541 PIPE_CONFIG(ADDR_SURF_P2) |
3542 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3544 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3545 PIPE_CONFIG(ADDR_SURF_P2) |
3546 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3548 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3549 PIPE_CONFIG(ADDR_SURF_P2) |
3550 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3552 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3553 PIPE_CONFIG(ADDR_SURF_P2) |
3554 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3556 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3557 PIPE_CONFIG(ADDR_SURF_P2) |
3558 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3560 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3561 PIPE_CONFIG(ADDR_SURF_P2) |
3562 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3564 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3565 PIPE_CONFIG(ADDR_SURF_P2) |
3566 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3568 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3569 PIPE_CONFIG(ADDR_SURF_P2) |
3570 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3571 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3572 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3573 PIPE_CONFIG(ADDR_SURF_P2) |
3574 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3576 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3577 PIPE_CONFIG(ADDR_SURF_P2) |
3578 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3580 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3581 PIPE_CONFIG(ADDR_SURF_P2) |
3582 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3583 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3584
3585 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3588 NUM_BANKS(ADDR_SURF_8_BANK));
3589 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3592 NUM_BANKS(ADDR_SURF_8_BANK));
3593 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3596 NUM_BANKS(ADDR_SURF_8_BANK));
3597 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3600 NUM_BANKS(ADDR_SURF_8_BANK));
3601 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3604 NUM_BANKS(ADDR_SURF_8_BANK));
3605 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3608 NUM_BANKS(ADDR_SURF_8_BANK));
3609 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3612 NUM_BANKS(ADDR_SURF_8_BANK));
3613 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3616 NUM_BANKS(ADDR_SURF_16_BANK));
3617 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3620 NUM_BANKS(ADDR_SURF_16_BANK));
3621 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3622 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3623 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3624 NUM_BANKS(ADDR_SURF_16_BANK));
3625 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3626 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3627 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3628 NUM_BANKS(ADDR_SURF_16_BANK));
3629 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3630 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3631 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3632 NUM_BANKS(ADDR_SURF_16_BANK));
3633 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3636 NUM_BANKS(ADDR_SURF_16_BANK));
3637 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3638 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3639 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3640 NUM_BANKS(ADDR_SURF_8_BANK));
3641
3642 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3643 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3644 reg_offset != 23)
3645 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3646
3647 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3648 if (reg_offset != 7)
3649 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3650
3651 break;
aaa36a97
AD
3652 }
3653}
3654
05fb7291 3655static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3656 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3657{
9559ef5b
TSD
3658 u32 data;
3659
3660 if (instance == 0xffffffff)
3661 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3662 else
3663 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97 3664
5003f278 3665 if (se_num == 0xffffffff)
aaa36a97 3666 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
5003f278 3667 else
aaa36a97 3668 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
5003f278
TSD
3669
3670 if (sh_num == 0xffffffff)
3671 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3672 else
aaa36a97 3673 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
5003f278 3674
aaa36a97
AD
3675 WREG32(mmGRBM_GFX_INDEX, data);
3676}
3677
8f8e00c1
AD
3678static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3679{
3680 return (u32)((1ULL << bit_width) - 1);
3681}
3682
3683static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3684{
3685 u32 data, mask;
3686
5003f278
TSD
3687 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3688 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
aaa36a97 3689
5003f278 3690 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
aaa36a97 3691
8f8e00c1
AD
3692 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3693 adev->gfx.config.max_sh_per_se);
aaa36a97 3694
8f8e00c1 3695 return (~data) & mask;
aaa36a97
AD
3696}
3697
167ac573
HR
3698static void
3699gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3700{
3701 switch (adev->asic_type) {
3702 case CHIP_FIJI:
3703 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3704 RB_XSEL2(1) | PKR_MAP(2) |
3705 PKR_XSEL(1) | PKR_YSEL(1) |
3706 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3707 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3708 SE_PAIR_YSEL(2);
3709 break;
3710 case CHIP_TONGA:
3711 case CHIP_POLARIS10:
3712 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3713 SE_XSEL(1) | SE_YSEL(1);
3714 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3715 SE_PAIR_YSEL(2);
3716 break;
3717 case CHIP_TOPAZ:
3718 case CHIP_CARRIZO:
3719 *rconf |= RB_MAP_PKR0(2);
3720 *rconf1 |= 0x0;
3721 break;
3722 case CHIP_POLARIS11:
c4642a47 3723 case CHIP_POLARIS12:
167ac573
HR
3724 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3725 SE_XSEL(1) | SE_YSEL(1);
3726 *rconf1 |= 0x0;
3727 break;
3728 case CHIP_STONEY:
3729 *rconf |= 0x0;
3730 *rconf1 |= 0x0;
3731 break;
3732 default:
3733 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3734 break;
3735 }
3736}
3737
3738static void
3739gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3740 u32 raster_config, u32 raster_config_1,
3741 unsigned rb_mask, unsigned num_rb)
3742{
3743 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3744 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3745 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3746 unsigned rb_per_se = num_rb / num_se;
3747 unsigned se_mask[4];
3748 unsigned se;
3749
3750 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3751 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3752 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3753 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3754
3755 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3756 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3757 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3758
3759 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3760 (!se_mask[2] && !se_mask[3]))) {
3761 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3762
3763 if (!se_mask[0] && !se_mask[1]) {
3764 raster_config_1 |=
3765 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3766 } else {
3767 raster_config_1 |=
3768 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3769 }
3770 }
3771
3772 for (se = 0; se < num_se; se++) {
3773 unsigned raster_config_se = raster_config;
3774 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3775 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3776 int idx = (se / 2) * 2;
3777
3778 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3779 raster_config_se &= ~SE_MAP_MASK;
3780
3781 if (!se_mask[idx]) {
3782 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3783 } else {
3784 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3785 }
3786 }
3787
3788 pkr0_mask &= rb_mask;
3789 pkr1_mask &= rb_mask;
3790 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3791 raster_config_se &= ~PKR_MAP_MASK;
3792
3793 if (!pkr0_mask) {
3794 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3795 } else {
3796 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3797 }
3798 }
3799
3800 if (rb_per_se >= 2) {
3801 unsigned rb0_mask = 1 << (se * rb_per_se);
3802 unsigned rb1_mask = rb0_mask << 1;
3803
3804 rb0_mask &= rb_mask;
3805 rb1_mask &= rb_mask;
3806 if (!rb0_mask || !rb1_mask) {
3807 raster_config_se &= ~RB_MAP_PKR0_MASK;
3808
3809 if (!rb0_mask) {
3810 raster_config_se |=
3811 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3812 } else {
3813 raster_config_se |=
3814 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3815 }
3816 }
3817
3818 if (rb_per_se > 2) {
3819 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3820 rb1_mask = rb0_mask << 1;
3821 rb0_mask &= rb_mask;
3822 rb1_mask &= rb_mask;
3823 if (!rb0_mask || !rb1_mask) {
3824 raster_config_se &= ~RB_MAP_PKR1_MASK;
3825
3826 if (!rb0_mask) {
3827 raster_config_se |=
3828 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3829 } else {
3830 raster_config_se |=
3831 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3832 }
3833 }
3834 }
3835 }
3836
3837 /* GRBM_GFX_INDEX has a different offset on VI */
3838 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3839 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3840 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3841 }
3842
3843 /* GRBM_GFX_INDEX has a different offset on VI */
3844 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3845}
3846
8f8e00c1 3847static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3848{
3849 int i, j;
aac1e3ca 3850 u32 data;
167ac573 3851 u32 raster_config = 0, raster_config_1 = 0;
8f8e00c1 3852 u32 active_rbs = 0;
6157bd7a
FC
3853 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3854 adev->gfx.config.max_sh_per_se;
167ac573 3855 unsigned num_rb_pipes;
aaa36a97
AD
3856
3857 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3858 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3859 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3860 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3861 data = gfx_v8_0_get_rb_active_bitmap(adev);
3862 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3863 rb_bitmap_width_per_sh);
aaa36a97
AD
3864 }
3865 }
9559ef5b 3866 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97 3867
8f8e00c1 3868 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3869 adev->gfx.config.num_rbs = hweight32(active_rbs);
167ac573
HR
3870
3871 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3872 adev->gfx.config.max_shader_engines, 16);
3873
3874 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3875
3876 if (!adev->gfx.config.backend_enable_mask ||
3877 adev->gfx.config.num_rbs >= num_rb_pipes) {
3878 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3879 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3880 } else {
3881 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3882 adev->gfx.config.backend_enable_mask,
3883 num_rb_pipes);
3884 }
3885
392f0c77
AD
3886 /* cache the values for userspace */
3887 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3888 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3889 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3890 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3891 RREG32(mmCC_RB_BACKEND_DISABLE);
3892 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3893 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3894 adev->gfx.config.rb_config[i][j].raster_config =
3895 RREG32(mmPA_SC_RASTER_CONFIG);
3896 adev->gfx.config.rb_config[i][j].raster_config_1 =
3897 RREG32(mmPA_SC_RASTER_CONFIG_1);
3898 }
3899 }
3900 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
167ac573 3901 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
3902}
3903
cd06bf68 3904/**
35c7a952 3905 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68 3906 *
dc102c43 3907 * @adev: amdgpu_device pointer
cd06bf68
BG
3908 *
3909 * Initialize compute vmid sh_mem registers
3910 *
3911 */
3912#define DEFAULT_SH_MEM_BASES (0x6000)
3913#define FIRST_COMPUTE_VMID (8)
3914#define LAST_COMPUTE_VMID (16)
35c7a952 3915static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3916{
3917 int i;
3918 uint32_t sh_mem_config;
3919 uint32_t sh_mem_bases;
3920
3921 /*
3922 * Configure apertures:
3923 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3924 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3925 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3926 */
3927 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3928
3929 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3930 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3931 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3932 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3933 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3934 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3935
3936 mutex_lock(&adev->srbm_mutex);
3937 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3938 vi_srbm_select(adev, 0, 0, 0, i);
3939 /* CP and shaders */
3940 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3941 WREG32(mmSH_MEM_APE1_BASE, 1);
3942 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3943 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3944 }
3945 vi_srbm_select(adev, 0, 0, 0, 0);
3946 mutex_unlock(&adev->srbm_mutex);
3947}
3948
df6e2c4a
JZ
3949static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3950{
3951 switch (adev->asic_type) {
3952 default:
3953 adev->gfx.config.double_offchip_lds_buf = 1;
3954 break;
3955 case CHIP_CARRIZO:
3956 case CHIP_STONEY:
3957 adev->gfx.config.double_offchip_lds_buf = 0;
3958 break;
3959 }
3960}
3961
aaa36a97
AD
3962static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3963{
8fe73328 3964 u32 tmp, sh_static_mem_cfg;
aaa36a97
AD
3965 int i;
3966
61cb8cef 3967 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
0bde3a95
AD
3968 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3969 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3970 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3971
3972 gfx_v8_0_tiling_mode_table_init(adev);
8f8e00c1 3973 gfx_v8_0_setup_rb(adev);
7dae69a2 3974 gfx_v8_0_get_cu_info(adev);
df6e2c4a 3975 gfx_v8_0_config_init(adev);
aaa36a97
AD
3976
3977 /* XXX SH_MEM regs */
3978 /* where to put LDS, scratch, GPUVM in FSA64 space */
8fe73328
JZ
3979 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3980 SWIZZLE_ENABLE, 1);
3981 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3982 ELEMENT_SIZE, 1);
3983 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3984 INDEX_STRIDE, 3);
aaa36a97 3985 mutex_lock(&adev->srbm_mutex);
7645670d 3986 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
aaa36a97
AD
3987 vi_srbm_select(adev, 0, 0, 0, i);
3988 /* CP and shaders */
3989 if (i == 0) {
3990 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3991 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3992 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3993 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 3994 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328 3995 WREG32(mmSH_MEM_BASES, 0);
aaa36a97
AD
3996 } else {
3997 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
8fe73328 3998 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3999 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 4000 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97 4001 WREG32(mmSH_MEM_CONFIG, tmp);
8fe73328
JZ
4002 tmp = adev->mc.shared_aperture_start >> 48;
4003 WREG32(mmSH_MEM_BASES, tmp);
aaa36a97
AD
4004 }
4005
4006 WREG32(mmSH_MEM_APE1_BASE, 1);
4007 WREG32(mmSH_MEM_APE1_LIMIT, 0);
8fe73328 4008 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
aaa36a97
AD
4009 }
4010 vi_srbm_select(adev, 0, 0, 0, 0);
4011 mutex_unlock(&adev->srbm_mutex);
4012
35c7a952 4013 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 4014
aaa36a97
AD
4015 mutex_lock(&adev->grbm_idx_mutex);
4016 /*
4017 * making sure that the following register writes will be broadcasted
4018 * to all the shaders
4019 */
9559ef5b 4020 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
4021
4022 WREG32(mmPA_SC_FIFO_SIZE,
4023 (adev->gfx.config.sc_prim_fifo_size_frontend <<
4024 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
4025 (adev->gfx.config.sc_prim_fifo_size_backend <<
4026 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
4027 (adev->gfx.config.sc_hiz_tile_fifo_size <<
4028 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
4029 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
4030 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
d2383267 4031
4032 tmp = RREG32(mmSPI_ARB_PRIORITY);
4033 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
4034 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
4035 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
4036 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
4037 WREG32(mmSPI_ARB_PRIORITY, tmp);
4038
aaa36a97
AD
4039 mutex_unlock(&adev->grbm_idx_mutex);
4040
4041}
4042
4043static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
4044{
4045 u32 i, j, k;
4046 u32 mask;
4047
4048 mutex_lock(&adev->grbm_idx_mutex);
4049 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4050 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 4051 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
4052 for (k = 0; k < adev->usec_timeout; k++) {
4053 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
4054 break;
4055 udelay(1);
4056 }
4057 }
4058 }
9559ef5b 4059 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
4060 mutex_unlock(&adev->grbm_idx_mutex);
4061
4062 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
4063 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
4064 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
4065 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
4066 for (k = 0; k < adev->usec_timeout; k++) {
4067 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4068 break;
4069 udelay(1);
4070 }
4071}
4072
4073static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
4074 bool enable)
4075{
4076 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
4077
0d07db7e
TSD
4078 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
4079 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
4080 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
4081 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
4082
aaa36a97
AD
4083 WREG32(mmCP_INT_CNTL_RING0, tmp);
4084}
4085
2b6cd977
EH
4086static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
4087{
4088 /* csib */
4089 WREG32(mmRLC_CSIB_ADDR_HI,
4090 adev->gfx.rlc.clear_state_gpu_addr >> 32);
4091 WREG32(mmRLC_CSIB_ADDR_LO,
4092 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
4093 WREG32(mmRLC_CSIB_LENGTH,
4094 adev->gfx.rlc.clear_state_size);
4095}
4096
4097static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
4098 int ind_offset,
4099 int list_size,
4100 int *unique_indices,
4101 int *indices_count,
4102 int max_indices,
4103 int *ind_start_offsets,
4104 int *offset_count,
4105 int max_offset)
4106{
4107 int indices;
4108 bool new_entry = true;
4109
4110 for (; ind_offset < list_size; ind_offset++) {
4111
4112 if (new_entry) {
4113 new_entry = false;
4114 ind_start_offsets[*offset_count] = ind_offset;
4115 *offset_count = *offset_count + 1;
4116 BUG_ON(*offset_count >= max_offset);
4117 }
4118
4119 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
4120 new_entry = true;
4121 continue;
4122 }
4123
4124 ind_offset += 2;
4125
4126 /* look for the matching indice */
4127 for (indices = 0;
4128 indices < *indices_count;
4129 indices++) {
4130 if (unique_indices[indices] ==
4131 register_list_format[ind_offset])
4132 break;
4133 }
4134
4135 if (indices >= *indices_count) {
4136 unique_indices[*indices_count] =
4137 register_list_format[ind_offset];
4138 indices = *indices_count;
4139 *indices_count = *indices_count + 1;
4140 BUG_ON(*indices_count >= max_indices);
4141 }
4142
4143 register_list_format[ind_offset] = indices;
4144 }
4145}
4146
4147static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4148{
4149 int i, temp, data;
4150 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4151 int indices_count = 0;
4152 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4153 int offset_count = 0;
4154
4155 int list_size;
4156 unsigned int *register_list_format =
4157 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3f12325a 4158 if (!register_list_format)
2b6cd977
EH
4159 return -ENOMEM;
4160 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4161 adev->gfx.rlc.reg_list_format_size_bytes);
4162
4163 gfx_v8_0_parse_ind_reg_list(register_list_format,
4164 RLC_FormatDirectRegListLength,
4165 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4166 unique_indices,
4167 &indices_count,
4168 sizeof(unique_indices) / sizeof(int),
4169 indirect_start_offsets,
4170 &offset_count,
4171 sizeof(indirect_start_offsets)/sizeof(int));
4172
4173 /* save and restore list */
61cb8cef 4174 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
2b6cd977
EH
4175
4176 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4177 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4178 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4179
4180 /* indirect list */
4181 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4182 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4183 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4184
4185 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4186 list_size = list_size >> 1;
4187 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4188 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4189
4190 /* starting offsets starts */
4191 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4192 adev->gfx.rlc.starting_offsets_start);
4193 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4194 WREG32(mmRLC_GPM_SCRATCH_DATA,
4195 indirect_start_offsets[i]);
4196
4197 /* unique indices */
4198 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4199 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4200 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
202e0b22 4201 if (unique_indices[i] != 0) {
b85c9d2a
ML
4202 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4203 WREG32(data + i, unique_indices[i] >> 20);
202e0b22 4204 }
2b6cd977
EH
4205 }
4206 kfree(register_list_format);
4207
4208 return 0;
4209}
4210
4211static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4212{
61cb8cef 4213 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
2b6cd977
EH
4214}
4215
fb16007b 4216static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
4217{
4218 uint32_t data;
4219
c4d17b81
RZ
4220 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4221
4222 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4223 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4224 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4225 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4226 WREG32(mmRLC_PG_DELAY, data);
4227
4228 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4229 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4230
f4bfffdd
EH
4231}
4232
2c547165
AD
4233static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4234 bool enable)
4235{
61cb8cef 4236 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
2c547165
AD
4237}
4238
4239static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4240 bool enable)
4241{
61cb8cef 4242 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
2c547165
AD
4243}
4244
4245static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4246{
eb584241 4247 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
2c547165
AD
4248}
4249
2b6cd977
EH
4250static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4251{
c4d17b81
RZ
4252 if ((adev->asic_type == CHIP_CARRIZO) ||
4253 (adev->asic_type == CHIP_STONEY)) {
2b6cd977
EH
4254 gfx_v8_0_init_csb(adev);
4255 gfx_v8_0_init_save_restore_list(adev);
4256 gfx_v8_0_enable_save_restore_machine(adev);
c4d17b81
RZ
4257 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4258 gfx_v8_0_init_power_gating(adev);
4259 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
c4642a47
JZ
4260 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4261 (adev->asic_type == CHIP_POLARIS12)) {
c4d17b81
RZ
4262 gfx_v8_0_init_csb(adev);
4263 gfx_v8_0_init_save_restore_list(adev);
4264 gfx_v8_0_enable_save_restore_machine(adev);
4265 gfx_v8_0_init_power_gating(adev);
2b6cd977 4266 }
c4d17b81 4267
2b6cd977
EH
4268}
4269
761c2e82 4270static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
aaa36a97 4271{
61cb8cef 4272 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
aaa36a97
AD
4273
4274 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
aaa36a97
AD
4275 gfx_v8_0_wait_for_rlc_serdes(adev);
4276}
4277
4278static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4279{
61cb8cef 4280 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
aaa36a97 4281 udelay(50);
61cb8cef
TSD
4282
4283 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
aaa36a97
AD
4284 udelay(50);
4285}
4286
4287static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4288{
61cb8cef 4289 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
aaa36a97
AD
4290
4291 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 4292 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4293 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4294
4295 udelay(50);
4296}
4297
4298static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4299{
4300 const struct rlc_firmware_header_v2_0 *hdr;
4301 const __le32 *fw_data;
4302 unsigned i, fw_size;
4303
4304 if (!adev->gfx.rlc_fw)
4305 return -EINVAL;
4306
4307 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4308 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
4309
4310 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4311 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4312 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4313
4314 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4315 for (i = 0; i < fw_size; i++)
4316 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4317 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4318
4319 return 0;
4320}
4321
4322static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4323{
4324 int r;
6ae81452 4325 u32 tmp;
aaa36a97
AD
4326
4327 gfx_v8_0_rlc_stop(adev);
4328
4329 /* disable CG */
6ae81452
AD
4330 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4331 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4332 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4333 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
2cc0c0b5 4334 if (adev->asic_type == CHIP_POLARIS11 ||
c4642a47
JZ
4335 adev->asic_type == CHIP_POLARIS10 ||
4336 adev->asic_type == CHIP_POLARIS12) {
6ae81452
AD
4337 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4338 tmp &= ~0x3;
4339 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4340 }
aaa36a97
AD
4341
4342 /* disable PG */
4343 WREG32(mmRLC_PG_CNTL, 0);
4344
4345 gfx_v8_0_rlc_reset(adev);
2b6cd977
EH
4346 gfx_v8_0_init_pg(adev);
4347
e61710c5 4348 if (!adev->pp_enabled) {
e635ee07 4349 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
4350 /* legacy rlc firmware loading */
4351 r = gfx_v8_0_rlc_load_microcode(adev);
4352 if (r)
4353 return r;
4354 } else {
4355 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4356 AMDGPU_UCODE_ID_RLC_G);
4357 if (r)
4358 return -EINVAL;
4359 }
aaa36a97
AD
4360 }
4361
4362 gfx_v8_0_rlc_start(adev);
4363
4364 return 0;
4365}
4366
4367static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4368{
4369 int i;
4370 u32 tmp = RREG32(mmCP_ME_CNTL);
4371
4372 if (enable) {
4373 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4374 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4375 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4376 } else {
4377 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4378 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4379 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4380 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4381 adev->gfx.gfx_ring[i].ready = false;
4382 }
4383 WREG32(mmCP_ME_CNTL, tmp);
4384 udelay(50);
4385}
4386
4387static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4388{
4389 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4390 const struct gfx_firmware_header_v1_0 *ce_hdr;
4391 const struct gfx_firmware_header_v1_0 *me_hdr;
4392 const __le32 *fw_data;
4393 unsigned i, fw_size;
4394
4395 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4396 return -EINVAL;
4397
4398 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4399 adev->gfx.pfp_fw->data;
4400 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4401 adev->gfx.ce_fw->data;
4402 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4403 adev->gfx.me_fw->data;
4404
4405 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4406 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4407 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4408
4409 gfx_v8_0_cp_gfx_enable(adev, false);
4410
4411 /* PFP */
4412 fw_data = (const __le32 *)
4413 (adev->gfx.pfp_fw->data +
4414 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4415 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4416 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4417 for (i = 0; i < fw_size; i++)
4418 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4419 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4420
4421 /* CE */
4422 fw_data = (const __le32 *)
4423 (adev->gfx.ce_fw->data +
4424 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4425 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4426 WREG32(mmCP_CE_UCODE_ADDR, 0);
4427 for (i = 0; i < fw_size; i++)
4428 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4429 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4430
4431 /* ME */
4432 fw_data = (const __le32 *)
4433 (adev->gfx.me_fw->data +
4434 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4435 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4436 WREG32(mmCP_ME_RAM_WADDR, 0);
4437 for (i = 0; i < fw_size; i++)
4438 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4439 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4440
4441 return 0;
4442}
4443
4444static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4445{
4446 u32 count = 0;
4447 const struct cs_section_def *sect = NULL;
4448 const struct cs_extent_def *ext = NULL;
4449
4450 /* begin clear state */
4451 count += 2;
4452 /* context control state */
4453 count += 3;
4454
4455 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4456 for (ext = sect->section; ext->extent != NULL; ++ext) {
4457 if (sect->id == SECT_CONTEXT)
4458 count += 2 + ext->reg_count;
4459 else
4460 return 0;
4461 }
4462 }
4463 /* pa_sc_raster_config/pa_sc_raster_config1 */
4464 count += 4;
4465 /* end clear state */
4466 count += 2;
4467 /* clear state */
4468 count += 2;
4469
4470 return count;
4471}
4472
4473static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4474{
4475 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4476 const struct cs_section_def *sect = NULL;
4477 const struct cs_extent_def *ext = NULL;
4478 int r, i;
4479
4480 /* init the CP */
4481 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4482 WREG32(mmCP_ENDIAN_SWAP, 0);
4483 WREG32(mmCP_DEVICE_ID, 1);
4484
4485 gfx_v8_0_cp_gfx_enable(adev, true);
4486
a27de35c 4487 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4488 if (r) {
4489 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4490 return r;
4491 }
4492
4493 /* clear state buffer */
4494 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4495 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4496
4497 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4498 amdgpu_ring_write(ring, 0x80000000);
4499 amdgpu_ring_write(ring, 0x80000000);
4500
4501 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4502 for (ext = sect->section; ext->extent != NULL; ++ext) {
4503 if (sect->id == SECT_CONTEXT) {
4504 amdgpu_ring_write(ring,
4505 PACKET3(PACKET3_SET_CONTEXT_REG,
4506 ext->reg_count));
4507 amdgpu_ring_write(ring,
4508 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4509 for (i = 0; i < ext->reg_count; i++)
4510 amdgpu_ring_write(ring, ext->extent[i]);
4511 }
4512 }
4513 }
4514
4515 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4516 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4517 switch (adev->asic_type) {
4518 case CHIP_TONGA:
2cc0c0b5 4519 case CHIP_POLARIS10:
aaa36a97
AD
4520 amdgpu_ring_write(ring, 0x16000012);
4521 amdgpu_ring_write(ring, 0x0000002A);
4522 break;
2cc0c0b5 4523 case CHIP_POLARIS11:
c4642a47 4524 case CHIP_POLARIS12:
68182d90
FC
4525 amdgpu_ring_write(ring, 0x16000012);
4526 amdgpu_ring_write(ring, 0x00000000);
4527 break;
fa676048
FC
4528 case CHIP_FIJI:
4529 amdgpu_ring_write(ring, 0x3a00161a);
4530 amdgpu_ring_write(ring, 0x0000002e);
4531 break;
aaa36a97
AD
4532 case CHIP_CARRIZO:
4533 amdgpu_ring_write(ring, 0x00000002);
4534 amdgpu_ring_write(ring, 0x00000000);
4535 break;
d1a7f7aa
KW
4536 case CHIP_TOPAZ:
4537 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4538 0x00000000 : 0x00000002);
4539 amdgpu_ring_write(ring, 0x00000000);
4540 break;
e3c7656c
SL
4541 case CHIP_STONEY:
4542 amdgpu_ring_write(ring, 0x00000000);
4543 amdgpu_ring_write(ring, 0x00000000);
4544 break;
aaa36a97
AD
4545 default:
4546 BUG();
4547 }
4548
4549 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4550 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4551
4552 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4553 amdgpu_ring_write(ring, 0);
4554
4555 /* init the CE partitions */
4556 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4557 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4558 amdgpu_ring_write(ring, 0x8000);
4559 amdgpu_ring_write(ring, 0x8000);
4560
a27de35c 4561 amdgpu_ring_commit(ring);
aaa36a97
AD
4562
4563 return 0;
4564}
4f339b29
RZ
4565static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4566{
4567 u32 tmp;
4568 /* no gfx doorbells on iceland */
4569 if (adev->asic_type == CHIP_TOPAZ)
4570 return;
4571
4572 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4573
4574 if (ring->use_doorbell) {
4575 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4576 DOORBELL_OFFSET, ring->doorbell_index);
4577 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4578 DOORBELL_HIT, 0);
4579 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4580 DOORBELL_EN, 1);
4581 } else {
4582 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4583 }
4584
4585 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4586
4587 if (adev->flags & AMD_IS_APU)
4588 return;
4589
4590 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4591 DOORBELL_RANGE_LOWER,
4592 AMDGPU_DOORBELL_GFX_RING0);
4593 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4594
4595 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4596 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4597}
aaa36a97
AD
4598
4599static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4600{
4601 struct amdgpu_ring *ring;
4602 u32 tmp;
4603 u32 rb_bufsz;
42e8cb50 4604 u64 rb_addr, rptr_addr, wptr_gpu_addr;
aaa36a97
AD
4605 int r;
4606
4607 /* Set the write pointer delay */
4608 WREG32(mmCP_RB_WPTR_DELAY, 0);
4609
4610 /* set the RB to use vmid 0 */
4611 WREG32(mmCP_RB_VMID, 0);
4612
4613 /* Set ring buffer size */
4614 ring = &adev->gfx.gfx_ring[0];
4615 rb_bufsz = order_base_2(ring->ring_size / 8);
4616 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4617 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4618 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4619 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4620#ifdef __BIG_ENDIAN
4621 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4622#endif
4623 WREG32(mmCP_RB0_CNTL, tmp);
4624
4625 /* Initialize the ring buffer's read and write pointers */
4626 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4627 ring->wptr = 0;
536fbf94 4628 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
4629
4630 /* set the wb address wether it's enabled or not */
4631 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4632 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4633 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4634
42e8cb50
FM
4635 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4636 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4637 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
aaa36a97
AD
4638 mdelay(1);
4639 WREG32(mmCP_RB0_CNTL, tmp);
4640
4641 rb_addr = ring->gpu_addr >> 8;
4642 WREG32(mmCP_RB0_BASE, rb_addr);
4643 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4644
4f339b29 4645 gfx_v8_0_set_cpg_door_bell(adev, ring);
aaa36a97 4646 /* start the ring */
f6bd7942 4647 amdgpu_ring_clear_ring(ring);
aaa36a97
AD
4648 gfx_v8_0_cp_gfx_start(adev);
4649 ring->ready = true;
4650 r = amdgpu_ring_test_ring(ring);
5003f278 4651 if (r)
aaa36a97 4652 ring->ready = false;
aaa36a97 4653
5003f278 4654 return r;
aaa36a97
AD
4655}
4656
4657static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4658{
4659 int i;
4660
4661 if (enable) {
4662 WREG32(mmCP_MEC_CNTL, 0);
4663 } else {
4664 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4665 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4666 adev->gfx.compute_ring[i].ready = false;
fcf17a43 4667 adev->gfx.kiq.ring.ready = false;
aaa36a97
AD
4668 }
4669 udelay(50);
4670}
4671
aaa36a97
AD
4672static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4673{
4674 const struct gfx_firmware_header_v1_0 *mec_hdr;
4675 const __le32 *fw_data;
4676 unsigned i, fw_size;
4677
4678 if (!adev->gfx.mec_fw)
4679 return -EINVAL;
4680
4681 gfx_v8_0_cp_compute_enable(adev, false);
4682
4683 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4684 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4685
4686 fw_data = (const __le32 *)
4687 (adev->gfx.mec_fw->data +
4688 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4689 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4690
4691 /* MEC1 */
4692 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4693 for (i = 0; i < fw_size; i++)
4694 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4695 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4696
4697 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4698 if (adev->gfx.mec2_fw) {
4699 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4700
4701 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4702 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4703
4704 fw_data = (const __le32 *)
4705 (adev->gfx.mec2_fw->data +
4706 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4707 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4708
4709 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4710 for (i = 0; i < fw_size; i++)
4711 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4712 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4713 }
4714
4715 return 0;
4716}
4717
4e638ae9
XY
4718/* KIQ functions */
4719static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4720{
4721 uint32_t tmp;
4722 struct amdgpu_device *adev = ring->adev;
4723
4724 /* tell RLC which is KIQ queue */
4725 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4726 tmp &= 0xffffff00;
4727 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4728 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4729 tmp |= 0x80;
4730 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4731}
4732
346586d5 4733static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4e638ae9 4734{
c3a49ab5 4735 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
f776952b
AD
4736 uint32_t scratch, tmp = 0;
4737 int r, i;
4738
4739 r = amdgpu_gfx_scratch_get(adev, &scratch);
4740 if (r) {
4741 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4742 return r;
4743 }
4744 WREG32(scratch, 0xCAFEDEAD);
4e638ae9 4745
346586d5 4746 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
f776952b
AD
4747 if (r) {
4748 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4749 amdgpu_gfx_scratch_free(adev, scratch);
4750 return r;
4751 }
346586d5
AD
4752 /* set resources */
4753 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4754 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4755 amdgpu_ring_write(kiq_ring, 0x000000FF); /* queue mask lo */
4756 amdgpu_ring_write(kiq_ring, 0); /* queue mask hi */
4757 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4758 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4759 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4760 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
c3a49ab5
AD
4761 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4762 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4763 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4764 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4765
4766 /* map queues */
4767 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4768 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3d7e30b3
AD
4769 amdgpu_ring_write(kiq_ring,
4770 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4771 amdgpu_ring_write(kiq_ring,
4772 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4773 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4774 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4775 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
c3a49ab5
AD
4776 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4777 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4778 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4779 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4780 }
f776952b
AD
4781 /* write to scratch for completion */
4782 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4783 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4784 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4e638ae9 4785 amdgpu_ring_commit(kiq_ring);
f776952b
AD
4786
4787 for (i = 0; i < adev->usec_timeout; i++) {
4788 tmp = RREG32(scratch);
4789 if (tmp == 0xDEADBEEF)
4790 break;
4791 DRM_UDELAY(1);
4792 }
4793 if (i >= adev->usec_timeout) {
c3a49ab5
AD
4794 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4795 scratch, tmp);
f776952b
AD
4796 r = -EINVAL;
4797 }
4798 amdgpu_gfx_scratch_free(adev, scratch);
4799
4800 return r;
4e638ae9
XY
4801}
4802
9d11ca9c
AD
4803static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
4804{
4805 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4806 uint32_t scratch, tmp = 0;
4807 int r, i;
4808
4809 r = amdgpu_gfx_scratch_get(adev, &scratch);
4810 if (r) {
4811 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4812 return r;
4813 }
4814 WREG32(scratch, 0xCAFEDEAD);
4815
4816 r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
4817 if (r) {
4818 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4819 amdgpu_gfx_scratch_free(adev, scratch);
4820 return r;
4821 }
4822 /* unmap queues */
4823 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4824 amdgpu_ring_write(kiq_ring,
4825 PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
4826 PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
4827 amdgpu_ring_write(kiq_ring, 0);
4828 amdgpu_ring_write(kiq_ring, 0);
4829 amdgpu_ring_write(kiq_ring, 0);
4830 amdgpu_ring_write(kiq_ring, 0);
4831 /* write to scratch for completion */
4832 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4833 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4834 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4835 amdgpu_ring_commit(kiq_ring);
4836
4837 for (i = 0; i < adev->usec_timeout; i++) {
4838 tmp = RREG32(scratch);
4839 if (tmp == 0xDEADBEEF)
4840 break;
4841 DRM_UDELAY(1);
4842 }
4843 if (i >= adev->usec_timeout) {
2d0806ca 4844 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n",
9d11ca9c
AD
4845 scratch, tmp);
4846 r = -EINVAL;
4847 }
4848 amdgpu_gfx_scratch_free(adev, scratch);
4849
4850 return r;
4851}
4852
34130fb1
AR
4853static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4854{
4855 int i, r = 0;
4856
4857 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4858 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4859 for (i = 0; i < adev->usec_timeout; i++) {
4860 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4861 break;
4862 udelay(1);
4863 }
4864 if (i == adev->usec_timeout)
4865 r = -ETIMEDOUT;
4866 }
4867 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4868 WREG32(mmCP_HQD_PQ_RPTR, 0);
4869 WREG32(mmCP_HQD_PQ_WPTR, 0);
4870
4871 return r;
4872}
4873
a2140e00 4874static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4e638ae9 4875{
015c2360 4876 struct amdgpu_device *adev = ring->adev;
a2140e00 4877 struct vi_mqd *mqd = ring->mqd_ptr;
4e638ae9
XY
4878 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4879 uint32_t tmp;
4880
34130fb1
AR
4881 /* init the mqd struct */
4882 memset(mqd, 0, sizeof(struct vi_mqd));
4883
4e638ae9
XY
4884 mqd->header = 0xC0310800;
4885 mqd->compute_pipelinestat_enable = 0x00000001;
4886 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4887 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4888 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4889 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4890 mqd->compute_misc_reserved = 0x00000003;
4891
34534610 4892 eop_base_addr = ring->eop_gpu_addr >> 8;
4e638ae9
XY
4893 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4894 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4895
4896 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4897 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4898 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
268cb4c7 4899 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4e638ae9
XY
4900
4901 mqd->cp_hqd_eop_control = tmp;
4902
4903 /* enable doorbell? */
bb215962
TSD
4904 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4905 CP_HQD_PQ_DOORBELL_CONTROL,
4906 DOORBELL_EN,
4907 ring->use_doorbell ? 1 : 0);
4e638ae9
XY
4908
4909 mqd->cp_hqd_pq_doorbell_control = tmp;
4910
4e638ae9 4911 /* set the pointer to the MQD */
015c2360
AD
4912 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4913 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4e638ae9
XY
4914
4915 /* set MQD vmid to 0 */
4916 tmp = RREG32(mmCP_MQD_CONTROL);
4917 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4918 mqd->cp_mqd_control = tmp;
4919
4920 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4921 hqd_gpu_addr = ring->gpu_addr >> 8;
4922 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4923 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4924
4925 /* set up the HQD, this is similar to CP_RB0_CNTL */
4926 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4927 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4928 (order_base_2(ring->ring_size / 4) - 1));
4929 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4930 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4931#ifdef __BIG_ENDIAN
4932 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4933#endif
4934 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4935 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4936 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4937 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4938 mqd->cp_hqd_pq_control = tmp;
4939
4940 /* set the wb address whether it's enabled or not */
4941 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4942 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4943 mqd->cp_hqd_pq_rptr_report_addr_hi =
4944 upper_32_bits(wb_gpu_addr) & 0xffff;
4945
4946 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4947 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4948 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4949 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4950
4951 tmp = 0;
4952 /* enable the doorbell if requested */
4953 if (ring->use_doorbell) {
4954 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4955 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4956 DOORBELL_OFFSET, ring->doorbell_index);
4957
4958 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4959 DOORBELL_EN, 1);
4960 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4961 DOORBELL_SOURCE, 0);
4962 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4963 DOORBELL_HIT, 0);
4964 }
4965
4966 mqd->cp_hqd_pq_doorbell_control = tmp;
4967
4968 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4969 ring->wptr = 0;
4970 mqd->cp_hqd_pq_wptr = ring->wptr;
4971 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4972
4973 /* set the vmid for the queue */
4974 mqd->cp_hqd_vmid = 0;
4975
4976 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4977 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4978 mqd->cp_hqd_persistent_state = tmp;
4979
ed6f55d1
AD
4980 /* set MTYPE */
4981 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4982 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4983 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4984 mqd->cp_hqd_ib_control = tmp;
4985
4986 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4987 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4988 mqd->cp_hqd_iq_timer = tmp;
4989
4990 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4991 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4992 mqd->cp_hqd_ctx_save_control = tmp;
4993
97bf47b2
AR
4994 /* defaults */
4995 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4996 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4997 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4998 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4999 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
5000 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
5001 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
5002 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
5003 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
5004 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
5005 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
5006 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
5007 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
5008 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
5009 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
5010
4e638ae9
XY
5011 /* activate the queue */
5012 mqd->cp_hqd_active = 1;
5013
5014 return 0;
5015}
5016
97bf47b2
AR
5017int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
5018 struct vi_mqd *mqd)
4e638ae9 5019{
4e638ae9 5020 /* disable wptr polling */
0ac642c5 5021 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4e638ae9
XY
5022
5023 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
5024 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
5025
5026 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5027 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
5028
5029 /* enable doorbell? */
5030 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
5031
34130fb1
AR
5032 /* set pq read/write pointers */
5033 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5034 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5035 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4e638ae9
XY
5036
5037 /* set the pointer to the MQD */
5038 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5039 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5040
5041 /* set MQD vmid to 0 */
5042 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
5043
5044 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5045 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5046 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5047
5048 /* set up the HQD, this is similar to CP_RB0_CNTL */
5049 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
5050
5051 /* set the wb address whether it's enabled or not */
5052 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5053 mqd->cp_hqd_pq_rptr_report_addr_lo);
5054 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5055 mqd->cp_hqd_pq_rptr_report_addr_hi);
5056
5057 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5058 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5059 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
5060
34130fb1 5061 /* enable the doorbell if requested */
4e638ae9
XY
5062 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
5063
5064 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5065 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
97bf47b2
AR
5066 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
5067 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
5068
5069 /* set the HQD priority */
5070 WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
5071 WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
5072 WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
5073
5074 /* set cwsr save area */
5075 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, mqd->cp_hqd_ctx_save_base_addr_lo);
5076 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, mqd->cp_hqd_ctx_save_base_addr_hi);
5077 WREG32(mmCP_HQD_CTX_SAVE_CONTROL, mqd->cp_hqd_ctx_save_control);
5078 WREG32(mmCP_HQD_CNTL_STACK_OFFSET, mqd->cp_hqd_cntl_stack_offset);
5079 WREG32(mmCP_HQD_CNTL_STACK_SIZE, mqd->cp_hqd_cntl_stack_size);
5080 WREG32(mmCP_HQD_WG_STATE_OFFSET, mqd->cp_hqd_wg_state_offset);
5081 WREG32(mmCP_HQD_CTX_SAVE_SIZE, mqd->cp_hqd_ctx_save_size);
5082
5083 WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
5084 WREG32(mmCP_HQD_EOP_EVENTS, mqd->cp_hqd_eop_done_events);
5085 WREG32(mmCP_HQD_ERROR, mqd->cp_hqd_error);
5086 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
5087 WREG32(mmCP_HQD_EOP_DONES, mqd->cp_hqd_eop_dones);
4e638ae9
XY
5088
5089 /* set the vmid for the queue */
5090 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5091
5092 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
5093
5094 /* activate the queue */
5095 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5096
4e638ae9
XY
5097 return 0;
5098}
5099
a2140e00 5100static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4e638ae9 5101{
bf13ed6b 5102 int r = 0;
4e638ae9 5103 struct amdgpu_device *adev = ring->adev;
a2140e00 5104 struct vi_mqd *mqd = ring->mqd_ptr;
1fb37a3d 5105 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4e638ae9 5106
39300115 5107 gfx_v8_0_kiq_setting(ring);
4e638ae9 5108
a545e491 5109 if (adev->gfx.in_reset) { /* for GPU_RESET case */
1fb37a3d
ML
5110 /* reset MQD to a clean status */
5111 if (adev->gfx.mec.mqd_backup[mqd_idx])
5112 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
5113
5114 /* reset ring buffer */
5115 ring->wptr = 0;
5116 amdgpu_ring_clear_ring(ring);
39300115
AD
5117 mutex_lock(&adev->srbm_mutex);
5118 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
bf13ed6b
AR
5119 r = gfx_v8_0_deactivate_hqd(adev, 1);
5120 if (r) {
5121 dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
5122 goto out_unlock;
5123 }
97bf47b2 5124 gfx_v8_0_mqd_commit(adev, mqd);
39300115
AD
5125 vi_srbm_select(adev, 0, 0, 0, 0);
5126 mutex_unlock(&adev->srbm_mutex);
a545e491 5127 } else {
a545e491
AD
5128 mutex_lock(&adev->srbm_mutex);
5129 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5130 gfx_v8_0_mqd_init(ring);
bf13ed6b
AR
5131 r = gfx_v8_0_deactivate_hqd(adev, 1);
5132 if (r) {
5133 dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
5134 goto out_unlock;
5135 }
97bf47b2 5136 gfx_v8_0_mqd_commit(adev, mqd);
a545e491
AD
5137 vi_srbm_select(adev, 0, 0, 0, 0);
5138 mutex_unlock(&adev->srbm_mutex);
5139
5140 if (adev->gfx.mec.mqd_backup[mqd_idx])
5141 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
1fb37a3d 5142 }
4e638ae9 5143
bf13ed6b
AR
5144 return r;
5145
5146out_unlock:
5147 vi_srbm_select(adev, 0, 0, 0, 0);
5148 mutex_unlock(&adev->srbm_mutex);
5149 return r;
39300115
AD
5150}
5151
5152static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
5153{
5154 struct amdgpu_device *adev = ring->adev;
5155 struct vi_mqd *mqd = ring->mqd_ptr;
5156 int mqd_idx = ring - &adev->gfx.compute_ring[0];
39300115
AD
5157
5158 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
39300115
AD
5159 mutex_lock(&adev->srbm_mutex);
5160 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5161 gfx_v8_0_mqd_init(ring);
5162 vi_srbm_select(adev, 0, 0, 0, 0);
5163 mutex_unlock(&adev->srbm_mutex);
5164
5165 if (adev->gfx.mec.mqd_backup[mqd_idx])
5166 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
a545e491 5167 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
39300115
AD
5168 /* reset MQD to a clean status */
5169 if (adev->gfx.mec.mqd_backup[mqd_idx])
5170 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
39300115
AD
5171 /* reset ring buffer */
5172 ring->wptr = 0;
5173 amdgpu_ring_clear_ring(ring);
94c9cead
RZ
5174 } else {
5175 amdgpu_ring_clear_ring(ring);
39300115 5176 }
c3a49ab5 5177 return 0;
4e638ae9
XY
5178}
5179
4f339b29
RZ
5180static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
5181{
5182 if (adev->asic_type > CHIP_TONGA) {
5183 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
5184 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
5185 }
6a124e67
AD
5186 /* enable doorbells */
5187 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4f339b29
RZ
5188}
5189
596c67d0 5190static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4e638ae9
XY
5191{
5192 struct amdgpu_ring *ring = NULL;
596c67d0 5193 int r = 0, i;
4e638ae9 5194
596c67d0 5195 gfx_v8_0_cp_compute_enable(adev, true);
4e638ae9
XY
5196
5197 ring = &adev->gfx.kiq.ring;
6a6f380f
AD
5198
5199 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5200 if (unlikely(r != 0))
5201 goto done;
5202
5203 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5204 if (!r) {
a2140e00 5205 r = gfx_v8_0_kiq_init_queue(ring);
596c67d0 5206 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 5207 ring->mqd_ptr = NULL;
4e638ae9 5208 }
6a6f380f
AD
5209 amdgpu_bo_unreserve(ring->mqd_obj);
5210 if (r)
5211 goto done;
4e638ae9 5212
4e638ae9
XY
5213 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5214 ring = &adev->gfx.compute_ring[i];
6a6f380f
AD
5215
5216 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5217 if (unlikely(r != 0))
5218 goto done;
5219 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
5220 if (!r) {
39300115 5221 r = gfx_v8_0_kcq_init_queue(ring);
596c67d0 5222 amdgpu_bo_kunmap(ring->mqd_obj);
1fb37a3d 5223 ring->mqd_ptr = NULL;
596c67d0 5224 }
6a6f380f
AD
5225 amdgpu_bo_unreserve(ring->mqd_obj);
5226 if (r)
5227 goto done;
c3a49ab5 5228 }
4e638ae9 5229
4f339b29 5230 gfx_v8_0_set_mec_doorbell_range(adev);
d17c0faf 5231
346586d5 5232 r = gfx_v8_0_kiq_kcq_enable(adev);
c3a49ab5
AD
5233 if (r)
5234 goto done;
5235
346586d5
AD
5236 /* Test KIQ */
5237 ring = &adev->gfx.kiq.ring;
5238 ring->ready = true;
5239 r = amdgpu_ring_test_ring(ring);
5240 if (r) {
5241 ring->ready = false;
5242 goto done;
5243 }
5244
5245 /* Test KCQs */
c3a49ab5
AD
5246 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5247 ring = &adev->gfx.compute_ring[i];
4e638ae9
XY
5248 ring->ready = true;
5249 r = amdgpu_ring_test_ring(ring);
5250 if (r)
5251 ring->ready = false;
5252 }
5253
6a6f380f
AD
5254done:
5255 return r;
4e638ae9
XY
5256}
5257
aaa36a97
AD
5258static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5259{
5260 int r;
5261
e3c7656c 5262 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
5263 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5264
e61710c5 5265 if (!adev->pp_enabled) {
e635ee07 5266 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
ba5c2a87
RZ
5267 /* legacy firmware loading */
5268 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5269 if (r)
5270 return r;
aaa36a97 5271
ba5c2a87
RZ
5272 r = gfx_v8_0_cp_compute_load_microcode(adev);
5273 if (r)
5274 return r;
5275 } else {
5276 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5277 AMDGPU_UCODE_ID_CP_CE);
5278 if (r)
5279 return -EINVAL;
5280
5281 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5282 AMDGPU_UCODE_ID_CP_PFP);
5283 if (r)
5284 return -EINVAL;
5285
5286 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5287 AMDGPU_UCODE_ID_CP_ME);
5288 if (r)
5289 return -EINVAL;
5290
951e0962
AD
5291 if (adev->asic_type == CHIP_TOPAZ) {
5292 r = gfx_v8_0_cp_compute_load_microcode(adev);
5293 if (r)
5294 return r;
5295 } else {
5296 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5297 AMDGPU_UCODE_ID_CP_MEC1);
5298 if (r)
5299 return -EINVAL;
5300 }
ba5c2a87 5301 }
aaa36a97
AD
5302 }
5303
5304 r = gfx_v8_0_cp_gfx_resume(adev);
5305 if (r)
5306 return r;
5307
b4e40676 5308 r = gfx_v8_0_kiq_resume(adev);
aaa36a97
AD
5309 if (r)
5310 return r;
5311
5312 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5313
5314 return 0;
5315}
5316
5317static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5318{
5319 gfx_v8_0_cp_gfx_enable(adev, enable);
5320 gfx_v8_0_cp_compute_enable(adev, enable);
5321}
5322
5fc3aeeb 5323static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
5324{
5325 int r;
5fc3aeeb 5326 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5327
5328 gfx_v8_0_init_golden_registers(adev);
aaa36a97
AD
5329 gfx_v8_0_gpu_init(adev);
5330
5331 r = gfx_v8_0_rlc_resume(adev);
5332 if (r)
5333 return r;
5334
5335 r = gfx_v8_0_cp_resume(adev);
aaa36a97
AD
5336
5337 return r;
5338}
5339
5fc3aeeb 5340static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 5341{
5fc3aeeb 5342 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5343
1d22a454
AD
5344 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5345 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
84f3f05b
XY
5346 if (amdgpu_sriov_vf(adev)) {
5347 pr_debug("For SRIOV client, shouldn't do anything.\n");
5348 return 0;
5349 }
9d11ca9c 5350 gfx_v8_0_kiq_kcq_disable(adev);
aaa36a97
AD
5351 gfx_v8_0_cp_enable(adev, false);
5352 gfx_v8_0_rlc_stop(adev);
aaa36a97 5353
62a86fc2
EH
5354 amdgpu_set_powergating_state(adev,
5355 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5356
aaa36a97
AD
5357 return 0;
5358}
5359
5fc3aeeb 5360static int gfx_v8_0_suspend(void *handle)
aaa36a97 5361{
5fc3aeeb 5362 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
b4e40676 5363 adev->gfx.in_suspend = true;
aaa36a97
AD
5364 return gfx_v8_0_hw_fini(adev);
5365}
5366
5fc3aeeb 5367static int gfx_v8_0_resume(void *handle)
aaa36a97 5368{
b4e40676 5369 int r;
5fc3aeeb 5370 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5371
b4e40676
DP
5372 r = gfx_v8_0_hw_init(adev);
5373 adev->gfx.in_suspend = false;
5374 return r;
aaa36a97
AD
5375}
5376
5fc3aeeb 5377static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5378{
5fc3aeeb 5379 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5380
aaa36a97
AD
5381 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5382 return false;
5383 else
5384 return true;
5385}
5386
5fc3aeeb 5387static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5388{
5389 unsigned i;
5fc3aeeb 5390 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5391
5392 for (i = 0; i < adev->usec_timeout; i++) {
5003f278 5393 if (gfx_v8_0_is_idle(handle))
aaa36a97 5394 return 0;
5003f278 5395
aaa36a97
AD
5396 udelay(1);
5397 }
5398 return -ETIMEDOUT;
5399}
5400
da146d3b 5401static bool gfx_v8_0_check_soft_reset(void *handle)
aaa36a97 5402{
3d7c6384 5403 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5404 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5405 u32 tmp;
5406
5407 /* GRBM_STATUS */
5408 tmp = RREG32(mmGRBM_STATUS);
5409 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5410 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5411 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5412 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5413 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3d7c6384
CZ
5414 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5415 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
aaa36a97
AD
5416 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5417 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5418 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5419 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
aaa36a97
AD
5420 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5421 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5422 }
5423
5424 /* GRBM_STATUS2 */
5425 tmp = RREG32(mmGRBM_STATUS2);
5426 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5427 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5428 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5429
3d7c6384
CZ
5430 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5431 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5432 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5433 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5434 SOFT_RESET_CPF, 1);
5435 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5436 SOFT_RESET_CPC, 1);
5437 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5438 SOFT_RESET_CPG, 1);
5439 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5440 SOFT_RESET_GRBM, 1);
5441 }
5442
aaa36a97
AD
5443 /* SRBM_STATUS */
5444 tmp = RREG32(mmSRBM_STATUS);
5445 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5446 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5447 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3d7c6384
CZ
5448 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5449 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5450 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
aaa36a97
AD
5451
5452 if (grbm_soft_reset || srbm_soft_reset) {
3d7c6384
CZ
5453 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5454 adev->gfx.srbm_soft_reset = srbm_soft_reset;
da146d3b 5455 return true;
3d7c6384 5456 } else {
3d7c6384
CZ
5457 adev->gfx.grbm_soft_reset = 0;
5458 adev->gfx.srbm_soft_reset = 0;
da146d3b 5459 return false;
3d7c6384 5460 }
3d7c6384 5461}
aaa36a97 5462
1057f20c
CZ
5463static int gfx_v8_0_pre_soft_reset(void *handle)
5464{
5465 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5466 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5467
da146d3b
AD
5468 if ((!adev->gfx.grbm_soft_reset) &&
5469 (!adev->gfx.srbm_soft_reset))
1057f20c
CZ
5470 return 0;
5471
5472 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5473 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5474
5475 /* stop the rlc */
5476 gfx_v8_0_rlc_stop(adev);
5477
5478 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5479 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
aaa36a97
AD
5480 /* Disable GFX parsing/prefetching */
5481 gfx_v8_0_cp_gfx_enable(adev, false);
5482
1057f20c
CZ
5483 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5484 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5485 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5486 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5487 int i;
5488
5489 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5490 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5491
a99f249d
AD
5492 mutex_lock(&adev->srbm_mutex);
5493 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5494 gfx_v8_0_deactivate_hqd(adev, 2);
5495 vi_srbm_select(adev, 0, 0, 0, 0);
5496 mutex_unlock(&adev->srbm_mutex);
1057f20c 5497 }
aaa36a97 5498 /* Disable MEC parsing/prefetching */
7776a693 5499 gfx_v8_0_cp_compute_enable(adev, false);
1057f20c 5500 }
7776a693 5501
1057f20c
CZ
5502 return 0;
5503}
7776a693 5504
3d7c6384
CZ
5505static int gfx_v8_0_soft_reset(void *handle)
5506{
5507 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5508 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5509 u32 tmp;
aaa36a97 5510
da146d3b
AD
5511 if ((!adev->gfx.grbm_soft_reset) &&
5512 (!adev->gfx.srbm_soft_reset))
3d7c6384 5513 return 0;
aaa36a97 5514
3d7c6384
CZ
5515 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5516 srbm_soft_reset = adev->gfx.srbm_soft_reset;
aaa36a97 5517
3d7c6384
CZ
5518 if (grbm_soft_reset || srbm_soft_reset) {
5519 tmp = RREG32(mmGMCON_DEBUG);
5520 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5521 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5522 WREG32(mmGMCON_DEBUG, tmp);
5523 udelay(50);
5524 }
aaa36a97 5525
3d7c6384
CZ
5526 if (grbm_soft_reset) {
5527 tmp = RREG32(mmGRBM_SOFT_RESET);
5528 tmp |= grbm_soft_reset;
5529 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5530 WREG32(mmGRBM_SOFT_RESET, tmp);
5531 tmp = RREG32(mmGRBM_SOFT_RESET);
aaa36a97 5532
3d7c6384 5533 udelay(50);
aaa36a97 5534
3d7c6384
CZ
5535 tmp &= ~grbm_soft_reset;
5536 WREG32(mmGRBM_SOFT_RESET, tmp);
5537 tmp = RREG32(mmGRBM_SOFT_RESET);
5538 }
7776a693 5539
3d7c6384
CZ
5540 if (srbm_soft_reset) {
5541 tmp = RREG32(mmSRBM_SOFT_RESET);
5542 tmp |= srbm_soft_reset;
5543 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5544 WREG32(mmSRBM_SOFT_RESET, tmp);
5545 tmp = RREG32(mmSRBM_SOFT_RESET);
7776a693 5546
aaa36a97 5547 udelay(50);
7776a693 5548
3d7c6384
CZ
5549 tmp &= ~srbm_soft_reset;
5550 WREG32(mmSRBM_SOFT_RESET, tmp);
5551 tmp = RREG32(mmSRBM_SOFT_RESET);
aaa36a97 5552 }
7776a693 5553
3d7c6384
CZ
5554 if (grbm_soft_reset || srbm_soft_reset) {
5555 tmp = RREG32(mmGMCON_DEBUG);
5556 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5557 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5558 WREG32(mmGMCON_DEBUG, tmp);
aaa36a97 5559 }
3d7c6384
CZ
5560
5561 /* Wait a little for things to settle down */
5562 udelay(50);
5563
aaa36a97
AD
5564 return 0;
5565}
5566
e4ae0fc3
CZ
5567static int gfx_v8_0_post_soft_reset(void *handle)
5568{
5569 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5570 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5571
da146d3b
AD
5572 if ((!adev->gfx.grbm_soft_reset) &&
5573 (!adev->gfx.srbm_soft_reset))
e4ae0fc3
CZ
5574 return 0;
5575
5576 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5577 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5578
5579 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5580 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5581 gfx_v8_0_cp_gfx_resume(adev);
5582
5583 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5584 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5585 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5586 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5587 int i;
5588
5589 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5590 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5591
a99f249d
AD
5592 mutex_lock(&adev->srbm_mutex);
5593 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5594 gfx_v8_0_deactivate_hqd(adev, 2);
5595 vi_srbm_select(adev, 0, 0, 0, 0);
5596 mutex_unlock(&adev->srbm_mutex);
e4ae0fc3 5597 }
b4e40676 5598 gfx_v8_0_kiq_resume(adev);
e4ae0fc3
CZ
5599 }
5600 gfx_v8_0_rlc_start(adev);
5601
aaa36a97
AD
5602 return 0;
5603}
5604
5605/**
5606 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5607 *
5608 * @adev: amdgpu_device pointer
5609 *
5610 * Fetches a GPU clock counter snapshot.
5611 * Returns the 64 bit clock counter snapshot.
5612 */
b95e31fd 5613static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5614{
5615 uint64_t clock;
5616
5617 mutex_lock(&adev->gfx.gpu_clock_mutex);
5618 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5619 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5620 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5621 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5622 return clock;
5623}
5624
5625static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5626 uint32_t vmid,
5627 uint32_t gds_base, uint32_t gds_size,
5628 uint32_t gws_base, uint32_t gws_size,
5629 uint32_t oa_base, uint32_t oa_size)
5630{
5631 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5632 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5633
5634 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5635 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5636
5637 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5638 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5639
5640 /* GDS Base */
5641 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5642 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5643 WRITE_DATA_DST_SEL(0)));
5644 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5645 amdgpu_ring_write(ring, 0);
5646 amdgpu_ring_write(ring, gds_base);
5647
5648 /* GDS Size */
5649 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5650 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5651 WRITE_DATA_DST_SEL(0)));
5652 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5653 amdgpu_ring_write(ring, 0);
5654 amdgpu_ring_write(ring, gds_size);
5655
5656 /* GWS */
5657 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5658 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5659 WRITE_DATA_DST_SEL(0)));
5660 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5661 amdgpu_ring_write(ring, 0);
5662 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5663
5664 /* OA */
5665 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5666 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5667 WRITE_DATA_DST_SEL(0)));
5668 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5669 amdgpu_ring_write(ring, 0);
5670 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5671}
5672
472259f0
TSD
5673static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5674{
bc24fbe9
TSD
5675 WREG32(mmSQ_IND_INDEX,
5676 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5677 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5678 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5679 (SQ_IND_INDEX__FORCE_READ_MASK));
472259f0
TSD
5680 return RREG32(mmSQ_IND_DATA);
5681}
5682
c5a60ce8
TSD
5683static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5684 uint32_t wave, uint32_t thread,
5685 uint32_t regno, uint32_t num, uint32_t *out)
5686{
5687 WREG32(mmSQ_IND_INDEX,
5688 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5689 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5690 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5691 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5692 (SQ_IND_INDEX__FORCE_READ_MASK) |
5693 (SQ_IND_INDEX__AUTO_INCR_MASK));
5694 while (num--)
5695 *(out++) = RREG32(mmSQ_IND_DATA);
5696}
5697
472259f0
TSD
5698static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5699{
5700 /* type 0 wave data */
5701 dst[(*no_fields)++] = 0;
5702 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5703 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5704 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5705 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5706 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5707 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5708 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5709 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5710 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5711 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5712 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5713 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
74f3ce31
TSD
5714 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5715 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5716 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5717 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5718 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5719 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
472259f0
TSD
5720}
5721
c5a60ce8
TSD
5722static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5723 uint32_t wave, uint32_t start,
5724 uint32_t size, uint32_t *dst)
5725{
5726 wave_read_regs(
5727 adev, simd, wave, 0,
5728 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5729}
5730
472259f0 5731
b95e31fd
AD
5732static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5733 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5734 .select_se_sh = &gfx_v8_0_select_se_sh,
472259f0 5735 .read_wave_data = &gfx_v8_0_read_wave_data,
c5a60ce8 5736 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
b95e31fd
AD
5737};
5738
5fc3aeeb 5739static int gfx_v8_0_early_init(void *handle)
aaa36a97 5740{
5fc3aeeb 5741 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5742
5743 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
78c16834 5744 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
b95e31fd 5745 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5746 gfx_v8_0_set_ring_funcs(adev);
5747 gfx_v8_0_set_irq_funcs(adev);
5748 gfx_v8_0_set_gds_init(adev);
dbff57bc 5749 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5750
5751 return 0;
5752}
5753
ccba7691
AD
5754static int gfx_v8_0_late_init(void *handle)
5755{
5756 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5757 int r;
5758
1d22a454
AD
5759 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5760 if (r)
5761 return r;
5762
5763 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5764 if (r)
5765 return r;
5766
ccba7691
AD
5767 /* requires IBs so do in late init after IB pool is initialized */
5768 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5769 if (r)
5770 return r;
5771
62a86fc2
EH
5772 amdgpu_set_powergating_state(adev,
5773 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5774
ccba7691
AD
5775 return 0;
5776}
5777
c2546f55
AD
5778static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5779 bool enable)
62a86fc2 5780{
c4642a47
JZ
5781 if ((adev->asic_type == CHIP_POLARIS11) ||
5782 (adev->asic_type == CHIP_POLARIS12))
c2546f55
AD
5783 /* Send msg to SMU via Powerplay */
5784 amdgpu_set_powergating_state(adev,
5785 AMD_IP_BLOCK_TYPE_SMC,
5786 enable ?
5787 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
62a86fc2 5788
61cb8cef 5789 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5790}
5791
c2546f55
AD
5792static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5793 bool enable)
62a86fc2 5794{
61cb8cef 5795 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5796}
5797
2cc0c0b5 5798static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5799 bool enable)
5800{
61cb8cef 5801 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
62a86fc2
EH
5802}
5803
2c547165
AD
5804static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5805 bool enable)
5806{
61cb8cef 5807 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
2c547165
AD
5808}
5809
5810static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5811 bool enable)
5812{
61cb8cef 5813 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
2c547165
AD
5814
5815 /* Read any GFX register to wake up GFX. */
5816 if (!enable)
61cb8cef 5817 RREG32(mmDB_RENDER_CONTROL);
2c547165
AD
5818}
5819
5820static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5821 bool enable)
5822{
5823 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5824 cz_enable_gfx_cg_power_gating(adev, true);
5825 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5826 cz_enable_gfx_pipeline_power_gating(adev, true);
5827 } else {
5828 cz_enable_gfx_cg_power_gating(adev, false);
5829 cz_enable_gfx_pipeline_power_gating(adev, false);
5830 }
5831}
5832
5fc3aeeb 5833static int gfx_v8_0_set_powergating_state(void *handle,
5834 enum amd_powergating_state state)
aaa36a97 5835{
62a86fc2 5836 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7e913664 5837 bool enable = (state == AMD_PG_STATE_GATE);
62a86fc2 5838
ce137c04
ML
5839 if (amdgpu_sriov_vf(adev))
5840 return 0;
5841
62a86fc2 5842 switch (adev->asic_type) {
2c547165
AD
5843 case CHIP_CARRIZO:
5844 case CHIP_STONEY:
ad1830d5 5845
5c964221
RZ
5846 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5847 cz_enable_sck_slow_down_on_power_up(adev, true);
5848 cz_enable_sck_slow_down_on_power_down(adev, true);
5849 } else {
5850 cz_enable_sck_slow_down_on_power_up(adev, false);
5851 cz_enable_sck_slow_down_on_power_down(adev, false);
5852 }
5853 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5854 cz_enable_cp_power_gating(adev, true);
5855 else
5856 cz_enable_cp_power_gating(adev, false);
5857
ad1830d5 5858 cz_update_gfx_cg_power_gating(adev, enable);
2c547165
AD
5859
5860 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5861 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5862 else
5863 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5864
5865 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5866 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5867 else
5868 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5869 break;
2cc0c0b5 5870 case CHIP_POLARIS11:
c4642a47 5871 case CHIP_POLARIS12:
7ba0eb6d
AD
5872 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5873 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5874 else
5875 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5876
5877 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5878 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5879 else
5880 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5881
5882 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5883 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5884 else
7ba0eb6d 5885 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5886 break;
5887 default:
5888 break;
5889 }
5890
aaa36a97
AD
5891 return 0;
5892}
5893
ebd843d6
HR
5894static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5895{
5896 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5897 int data;
5898
ce137c04
ML
5899 if (amdgpu_sriov_vf(adev))
5900 *flags = 0;
5901
ebd843d6
HR
5902 /* AMD_CG_SUPPORT_GFX_MGCG */
5903 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5904 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5905 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5906
5907 /* AMD_CG_SUPPORT_GFX_CGLG */
5908 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5909 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5910 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5911
5912 /* AMD_CG_SUPPORT_GFX_CGLS */
5913 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5914 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5915
5916 /* AMD_CG_SUPPORT_GFX_CGTS */
5917 data = RREG32(mmCGTS_SM_CTRL_REG);
5918 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5919 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5920
5921 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5922 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5923 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5924
5925 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5926 data = RREG32(mmRLC_MEM_SLP_CNTL);
5927 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5928 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5929
5930 /* AMD_CG_SUPPORT_GFX_CP_LS */
5931 data = RREG32(mmCP_MEM_SLP_CNTL);
5932 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5933 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5934}
5935
79deaaf4 5936static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5937 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5938{
5939 uint32_t data;
5940
9559ef5b 5941 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5942
5943 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5944 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5945
5946 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f 5947 if (adev->asic_type == CHIP_STONEY)
62d2ce4b
TSD
5948 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5949 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5950 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5951 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5952 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5953 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5954 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5955 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5956 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5957 else
5958 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5959 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5960 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5961 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5962 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5963 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5964 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5965 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5966 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5967 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5968 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5969 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5970 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5971 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5972 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5973
5974 WREG32(mmRLC_SERDES_WR_CTRL, data);
5975}
5976
dbff57bc
AD
5977#define MSG_ENTER_RLC_SAFE_MODE 1
5978#define MSG_EXIT_RLC_SAFE_MODE 0
61cb8cef
TSD
5979#define RLC_GPR_REG2__REQ_MASK 0x00000001
5980#define RLC_GPR_REG2__REQ__SHIFT 0
5981#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5982#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
dbff57bc 5983
dbff57bc
AD
5984static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5985{
5986 u32 data;
5987 unsigned i;
5988
5989 data = RREG32(mmRLC_CNTL);
5990 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5991 return;
5992
5993 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5994 data |= RLC_SAFE_MODE__CMD_MASK;
5995 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5996 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5997 WREG32(mmRLC_SAFE_MODE, data);
5998
5999 for (i = 0; i < adev->usec_timeout; i++) {
6000 if ((RREG32(mmRLC_GPM_STAT) &
6001 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6002 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6003 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6004 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6005 break;
6006 udelay(1);
6007 }
6008
6009 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 6010 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
6011 break;
6012 udelay(1);
6013 }
6014 adev->gfx.rlc.in_safe_mode = true;
6015 }
6016}
6017
6018static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6019{
6020 u32 data = 0;
6021 unsigned i;
6022
6023 data = RREG32(mmRLC_CNTL);
6024 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6025 return;
6026
6027 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6028 if (adev->gfx.rlc.in_safe_mode) {
6029 data |= RLC_SAFE_MODE__CMD_MASK;
6030 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6031 WREG32(mmRLC_SAFE_MODE, data);
6032 adev->gfx.rlc.in_safe_mode = false;
6033 }
6034 }
6035
6036 for (i = 0; i < adev->usec_timeout; i++) {
61cb8cef 6037 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
dbff57bc
AD
6038 break;
6039 udelay(1);
6040 }
6041}
6042
dbff57bc
AD
6043static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6044 .enter_safe_mode = iceland_enter_rlc_safe_mode,
6045 .exit_safe_mode = iceland_exit_rlc_safe_mode
6046};
6047
dbff57bc
AD
6048static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6049 bool enable)
6e378858
EH
6050{
6051 uint32_t temp, data;
6052
dbff57bc
AD
6053 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6054
6e378858 6055 /* It is disabled by HW by default */
14698b6c
AD
6056 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6057 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
61cb8cef 6058 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
14698b6c 6059 /* 1 - RLC memory Light sleep */
61cb8cef 6060 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6e378858 6061
61cb8cef
TSD
6062 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6063 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
14698b6c 6064 }
6e378858
EH
6065
6066 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6067 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
6068 if (adev->flags & AMD_IS_APU)
6069 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6070 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6071 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6072 else
6073 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6074 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6075 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6076 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
6077
6078 if (temp != data)
6079 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6080
6081 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6082 gfx_v8_0_wait_for_rlc_serdes(adev);
6083
6084 /* 5 - clear mgcg override */
79deaaf4 6085 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 6086
14698b6c
AD
6087 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6088 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6089 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6090 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6091 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6092 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6093 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6094 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6095 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6096 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6097 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6098 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6099 if (temp != data)
6100 WREG32(mmCGTS_SM_CTRL_REG, data);
6101 }
6e378858
EH
6102 udelay(50);
6103
6104 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6105 gfx_v8_0_wait_for_rlc_serdes(adev);
6106 } else {
6107 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6108 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6109 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6110 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6111 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6112 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6113 if (temp != data)
6114 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6115
6116 /* 2 - disable MGLS in RLC */
6117 data = RREG32(mmRLC_MEM_SLP_CNTL);
6118 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6119 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6120 WREG32(mmRLC_MEM_SLP_CNTL, data);
6121 }
6122
6123 /* 3 - disable MGLS in CP */
6124 data = RREG32(mmCP_MEM_SLP_CNTL);
6125 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6126 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6127 WREG32(mmCP_MEM_SLP_CNTL, data);
6128 }
6129
6130 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6131 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6132 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6133 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6134 if (temp != data)
6135 WREG32(mmCGTS_SM_CTRL_REG, data);
6136
6137 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6138 gfx_v8_0_wait_for_rlc_serdes(adev);
6139
6140 /* 6 - set mgcg override */
79deaaf4 6141 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6142
6143 udelay(50);
6144
6145 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6146 gfx_v8_0_wait_for_rlc_serdes(adev);
6147 }
dbff57bc
AD
6148
6149 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
6150}
6151
dbff57bc
AD
6152static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6153 bool enable)
6e378858
EH
6154{
6155 uint32_t temp, temp1, data, data1;
6156
6157 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6158
dbff57bc
AD
6159 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6160
14698b6c 6161 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
6162 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6163 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6164 if (temp1 != data1)
6165 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6166
dd31ae9a 6167 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6e378858
EH
6168 gfx_v8_0_wait_for_rlc_serdes(adev);
6169
dd31ae9a 6170 /* 2 - clear cgcg override */
79deaaf4 6171 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
6172
6173 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6174 gfx_v8_0_wait_for_rlc_serdes(adev);
6175
dd31ae9a 6176 /* 3 - write cmd to set CGLS */
79deaaf4 6177 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858 6178
dd31ae9a 6179 /* 4 - enable cgcg */
6e378858
EH
6180 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6181
14698b6c
AD
6182 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6183 /* enable cgls*/
6184 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 6185
14698b6c
AD
6186 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6187 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 6188
14698b6c
AD
6189 if (temp1 != data1)
6190 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6191 } else {
6192 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6193 }
6e378858
EH
6194
6195 if (temp != data)
6196 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
dd31ae9a
AN
6197
6198 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6199 * Cmp_busy/GFX_Idle interrupts
6200 */
6201 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858
EH
6202 } else {
6203 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6204 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6205
6206 /* TEST CGCG */
6207 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6208 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6209 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6210 if (temp1 != data1)
6211 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6212
6213 /* read gfx register to wake up cgcg */
6214 RREG32(mmCB_CGTT_SCLK_CTRL);
6215 RREG32(mmCB_CGTT_SCLK_CTRL);
6216 RREG32(mmCB_CGTT_SCLK_CTRL);
6217 RREG32(mmCB_CGTT_SCLK_CTRL);
6218
6219 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6220 gfx_v8_0_wait_for_rlc_serdes(adev);
6221
6222 /* write cmd to Set CGCG Overrride */
79deaaf4 6223 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
6224
6225 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6226 gfx_v8_0_wait_for_rlc_serdes(adev);
6227
6228 /* write cmd to Clear CGLS */
79deaaf4 6229 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
6230
6231 /* disable cgcg, cgls should be disabled too. */
6232 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 6233 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
6234 if (temp != data)
6235 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
d5dc36a4
AD
6236 /* enable interrupts again for PG */
6237 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6e378858 6238 }
dbff57bc 6239
7894745a
TSD
6240 gfx_v8_0_wait_for_rlc_serdes(adev);
6241
dbff57bc 6242 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 6243}
dbff57bc
AD
6244static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6245 bool enable)
6e378858
EH
6246{
6247 if (enable) {
6248 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6249 * === MGCG + MGLS + TS(CG/LS) ===
6250 */
dbff57bc
AD
6251 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6252 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
6253 } else {
6254 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6255 * === CGCG + CGLS ===
6256 */
dbff57bc
AD
6257 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6258 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
6259 }
6260 return 0;
6261}
6262
a8ca3413
RZ
6263static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6264 enum amd_clockgating_state state)
6265{
8a19e7fa
RZ
6266 uint32_t msg_id, pp_state = 0;
6267 uint32_t pp_support_state = 0;
a8ca3413
RZ
6268 void *pp_handle = adev->powerplay.pp_handle;
6269
8a19e7fa
RZ
6270 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6271 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6272 pp_support_state = PP_STATE_SUPPORT_LS;
6273 pp_state = PP_STATE_LS;
6274 }
6275 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6276 pp_support_state |= PP_STATE_SUPPORT_CG;
6277 pp_state |= PP_STATE_CG;
6278 }
6279 if (state == AMD_CG_STATE_UNGATE)
6280 pp_state = 0;
6281
6282 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6283 PP_BLOCK_GFX_CG,
6284 pp_support_state,
6285 pp_state);
6286 amd_set_clockgating_by_smu(pp_handle, msg_id);
6287 }
a8ca3413 6288
8a19e7fa
RZ
6289 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6290 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6291 pp_support_state = PP_STATE_SUPPORT_LS;
6292 pp_state = PP_STATE_LS;
6293 }
a8ca3413 6294
8a19e7fa
RZ
6295 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6296 pp_support_state |= PP_STATE_SUPPORT_CG;
6297 pp_state |= PP_STATE_CG;
6298 }
6299
6300 if (state == AMD_CG_STATE_UNGATE)
6301 pp_state = 0;
6302
6303 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6304 PP_BLOCK_GFX_MG,
6305 pp_support_state,
6306 pp_state);
6307 amd_set_clockgating_by_smu(pp_handle, msg_id);
6308 }
a8ca3413
RZ
6309
6310 return 0;
6311}
6312
6313static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6314 enum amd_clockgating_state state)
6315{
8a19e7fa
RZ
6316
6317 uint32_t msg_id, pp_state = 0;
6318 uint32_t pp_support_state = 0;
a8ca3413
RZ
6319 void *pp_handle = adev->powerplay.pp_handle;
6320
8a19e7fa
RZ
6321 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6322 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6323 pp_support_state = PP_STATE_SUPPORT_LS;
6324 pp_state = PP_STATE_LS;
6325 }
6326 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6327 pp_support_state |= PP_STATE_SUPPORT_CG;
6328 pp_state |= PP_STATE_CG;
6329 }
6330 if (state == AMD_CG_STATE_UNGATE)
6331 pp_state = 0;
6332
6333 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6334 PP_BLOCK_GFX_CG,
6335 pp_support_state,
6336 pp_state);
6337 amd_set_clockgating_by_smu(pp_handle, msg_id);
6338 }
a8ca3413 6339
8a19e7fa
RZ
6340 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6341 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6342 pp_support_state = PP_STATE_SUPPORT_LS;
6343 pp_state = PP_STATE_LS;
6344 }
6345 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6346 pp_support_state |= PP_STATE_SUPPORT_CG;
6347 pp_state |= PP_STATE_CG;
6348 }
6349 if (state == AMD_CG_STATE_UNGATE)
6350 pp_state = 0;
6351
6352 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6353 PP_BLOCK_GFX_3D,
6354 pp_support_state,
6355 pp_state);
6356 amd_set_clockgating_by_smu(pp_handle, msg_id);
6357 }
a8ca3413 6358
8a19e7fa
RZ
6359 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6360 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6361 pp_support_state = PP_STATE_SUPPORT_LS;
6362 pp_state = PP_STATE_LS;
6363 }
a8ca3413 6364
8a19e7fa
RZ
6365 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6366 pp_support_state |= PP_STATE_SUPPORT_CG;
6367 pp_state |= PP_STATE_CG;
6368 }
a8ca3413 6369
8a19e7fa
RZ
6370 if (state == AMD_CG_STATE_UNGATE)
6371 pp_state = 0;
a8ca3413 6372
8a19e7fa
RZ
6373 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6374 PP_BLOCK_GFX_MG,
6375 pp_support_state,
6376 pp_state);
6377 amd_set_clockgating_by_smu(pp_handle, msg_id);
6378 }
6379
6380 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6381 pp_support_state = PP_STATE_SUPPORT_LS;
6382
6383 if (state == AMD_CG_STATE_UNGATE)
6384 pp_state = 0;
6385 else
6386 pp_state = PP_STATE_LS;
6387
6388 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6389 PP_BLOCK_GFX_RLC,
6390 pp_support_state,
6391 pp_state);
6392 amd_set_clockgating_by_smu(pp_handle, msg_id);
6393 }
6394
6395 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6396 pp_support_state = PP_STATE_SUPPORT_LS;
6397
6398 if (state == AMD_CG_STATE_UNGATE)
6399 pp_state = 0;
6400 else
6401 pp_state = PP_STATE_LS;
6402 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
a8ca3413 6403 PP_BLOCK_GFX_CP,
8a19e7fa 6404 pp_support_state,
a8ca3413 6405 pp_state);
8a19e7fa
RZ
6406 amd_set_clockgating_by_smu(pp_handle, msg_id);
6407 }
a8ca3413
RZ
6408
6409 return 0;
6410}
6411
5fc3aeeb 6412static int gfx_v8_0_set_clockgating_state(void *handle,
6413 enum amd_clockgating_state state)
aaa36a97 6414{
6e378858
EH
6415 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6416
ce137c04
ML
6417 if (amdgpu_sriov_vf(adev))
6418 return 0;
6419
6e378858
EH
6420 switch (adev->asic_type) {
6421 case CHIP_FIJI:
dbff57bc
AD
6422 case CHIP_CARRIZO:
6423 case CHIP_STONEY:
6424 gfx_v8_0_update_gfx_clock_gating(adev,
7e913664 6425 state == AMD_CG_STATE_GATE);
6e378858 6426 break;
a8ca3413
RZ
6427 case CHIP_TONGA:
6428 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6429 break;
6430 case CHIP_POLARIS10:
6431 case CHIP_POLARIS11:
739e9fff 6432 case CHIP_POLARIS12:
a8ca3413
RZ
6433 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6434 break;
6e378858
EH
6435 default:
6436 break;
6437 }
aaa36a97
AD
6438 return 0;
6439}
6440
536fbf94 6441static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
aaa36a97 6442{
5003f278 6443 return ring->adev->wb.wb[ring->rptr_offs];
aaa36a97
AD
6444}
6445
536fbf94 6446static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
aaa36a97
AD
6447{
6448 struct amdgpu_device *adev = ring->adev;
aaa36a97
AD
6449
6450 if (ring->use_doorbell)
6451 /* XXX check if swapping is necessary on BE */
5003f278 6452 return ring->adev->wb.wb[ring->wptr_offs];
aaa36a97 6453 else
5003f278 6454 return RREG32(mmCP_RB0_WPTR);
aaa36a97
AD
6455}
6456
6457static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6458{
6459 struct amdgpu_device *adev = ring->adev;
6460
6461 if (ring->use_doorbell) {
6462 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6463 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6464 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97 6465 } else {
536fbf94 6466 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
aaa36a97
AD
6467 (void)RREG32(mmCP_RB0_WPTR);
6468 }
6469}
6470
d2edb07b 6471static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
6472{
6473 u32 ref_and_mask, reg_mem_engine;
6474
4e638ae9
XY
6475 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6476 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
aaa36a97
AD
6477 switch (ring->me) {
6478 case 1:
6479 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6480 break;
6481 case 2:
6482 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6483 break;
6484 default:
6485 return;
6486 }
6487 reg_mem_engine = 0;
6488 } else {
6489 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6490 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6491 }
6492
6493 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6494 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6495 WAIT_REG_MEM_FUNCTION(3) | /* == */
6496 reg_mem_engine));
6497 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6498 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6499 amdgpu_ring_write(ring, ref_and_mask);
6500 amdgpu_ring_write(ring, ref_and_mask);
6501 amdgpu_ring_write(ring, 0x20); /* poll interval */
6502}
6503
45682886
ML
6504static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6505{
6506 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6507 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6508 EVENT_INDEX(4));
6509
6510 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6511 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6512 EVENT_INDEX(0));
6513}
6514
6515
d35db561
CZ
6516static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6517{
6518 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6519 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6520 WRITE_DATA_DST_SEL(0) |
6521 WR_CONFIRM));
6522 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6523 amdgpu_ring_write(ring, 0);
6524 amdgpu_ring_write(ring, 1);
6525
6526}
6527
93323131 6528static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583
CK
6529 struct amdgpu_ib *ib,
6530 unsigned vm_id, bool ctx_switch)
aaa36a97
AD
6531{
6532 u32 header, control = 0;
aaa36a97 6533
de807f81 6534 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
6535 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6536 else
6537 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6538
d88bf583 6539 control |= ib->length_dw | (vm_id << 24);
aaa36a97 6540
635e7132 6541 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
2e2e3c7f
ML
6542 control |= INDIRECT_BUFFER_PRE_ENB(1);
6543
635e7132
ML
6544 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6545 gfx_v8_0_ring_emit_de_meta(ring);
6546 }
6547
aaa36a97
AD
6548 amdgpu_ring_write(ring, header);
6549 amdgpu_ring_write(ring,
6550#ifdef __BIG_ENDIAN
6551 (2 << 0) |
6552#endif
6553 (ib->gpu_addr & 0xFFFFFFFC));
6554 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6555 amdgpu_ring_write(ring, control);
6556}
6557
93323131 6558static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583
CK
6559 struct amdgpu_ib *ib,
6560 unsigned vm_id, bool ctx_switch)
93323131 6561{
33b7ed01 6562 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
93323131 6563
33b7ed01 6564 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 6565 amdgpu_ring_write(ring,
6566#ifdef __BIG_ENDIAN
62d2ce4b 6567 (2 << 0) |
93323131 6568#endif
62d2ce4b 6569 (ib->gpu_addr & 0xFFFFFFFC));
93323131 6570 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6571 amdgpu_ring_write(ring, control);
6572}
6573
aaa36a97 6574static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 6575 u64 seq, unsigned flags)
aaa36a97 6576{
890ee23f
CZ
6577 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6578 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6579
aaa36a97
AD
6580 /* EVENT_WRITE_EOP - flush caches, send int */
6581 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6582 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6583 EOP_TC_ACTION_EN |
f84e63f2 6584 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6585 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6586 EVENT_INDEX(5)));
6587 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 6588 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 6589 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6590 amdgpu_ring_write(ring, lower_32_bits(seq));
6591 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 6592
aaa36a97
AD
6593}
6594
b8c7b39e 6595static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97 6596{
21cd942e 6597 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 6598 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
6599 uint64_t addr = ring->fence_drv.gpu_addr;
6600
6601 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6602 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6603 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6604 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6605 amdgpu_ring_write(ring, addr & 0xfffffffc);
6606 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6607 amdgpu_ring_write(ring, seq);
6608 amdgpu_ring_write(ring, 0xffffffff);
6609 amdgpu_ring_write(ring, 4); /* poll interval */
b8c7b39e
CK
6610}
6611
6612static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6613 unsigned vm_id, uint64_t pd_addr)
6614{
21cd942e 6615 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6616
aaa36a97
AD
6617 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6618 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
6619 WRITE_DATA_DST_SEL(0)) |
6620 WR_CONFIRM);
aaa36a97
AD
6621 if (vm_id < 8) {
6622 amdgpu_ring_write(ring,
6623 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6624 } else {
6625 amdgpu_ring_write(ring,
6626 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6627 }
6628 amdgpu_ring_write(ring, 0);
6629 amdgpu_ring_write(ring, pd_addr >> 12);
6630
aaa36a97
AD
6631 /* bits 0-15 are the VM contexts0-15 */
6632 /* invalidate the cache */
6633 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6634 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6635 WRITE_DATA_DST_SEL(0)));
6636 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6637 amdgpu_ring_write(ring, 0);
6638 amdgpu_ring_write(ring, 1 << vm_id);
6639
6640 /* wait for the invalidate to complete */
6641 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6642 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6643 WAIT_REG_MEM_FUNCTION(0) | /* always */
6644 WAIT_REG_MEM_ENGINE(0))); /* me */
6645 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6646 amdgpu_ring_write(ring, 0);
6647 amdgpu_ring_write(ring, 0); /* ref */
6648 amdgpu_ring_write(ring, 0); /* mask */
6649 amdgpu_ring_write(ring, 0x20); /* poll interval */
6650
6651 /* compute doesn't have PFP */
6652 if (usepfp) {
6653 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6654 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6655 amdgpu_ring_write(ring, 0x0);
aaa36a97
AD
6656 }
6657}
6658
536fbf94 6659static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
aaa36a97
AD
6660{
6661 return ring->adev->wb.wb[ring->wptr_offs];
6662}
6663
6664static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6665{
6666 struct amdgpu_device *adev = ring->adev;
6667
6668 /* XXX check if swapping is necessary on BE */
536fbf94
KW
6669 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6670 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
aaa36a97
AD
6671}
6672
6673static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6674 u64 addr, u64 seq,
890ee23f 6675 unsigned flags)
aaa36a97 6676{
890ee23f
CZ
6677 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6678 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6679
aaa36a97
AD
6680 /* RELEASE_MEM - flush caches, send int */
6681 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6682 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6683 EOP_TC_ACTION_EN |
a3d5aaa8 6684 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6685 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6686 EVENT_INDEX(5)));
890ee23f 6687 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6688 amdgpu_ring_write(ring, addr & 0xfffffffc);
6689 amdgpu_ring_write(ring, upper_32_bits(addr));
6690 amdgpu_ring_write(ring, lower_32_bits(seq));
6691 amdgpu_ring_write(ring, upper_32_bits(seq));
6692}
6693
4e638ae9
XY
6694static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6695 u64 seq, unsigned int flags)
6696{
6697 /* we only allocate 32bit for each seq wb address */
f10b478d 6698 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4e638ae9
XY
6699
6700 /* write fence seq to the "addr" */
6701 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6702 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6703 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6704 amdgpu_ring_write(ring, lower_32_bits(addr));
6705 amdgpu_ring_write(ring, upper_32_bits(addr));
6706 amdgpu_ring_write(ring, lower_32_bits(seq));
6707
6708 if (flags & AMDGPU_FENCE_FLAG_INT) {
6709 /* set register to trigger INT */
6710 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6711 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6712 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6713 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6714 amdgpu_ring_write(ring, 0);
6715 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6716 }
6717}
6718
c2167a65
ML
6719static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6720{
6721 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6722 amdgpu_ring_write(ring, 0);
6723}
6724
753ad49c
ML
6725static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6726{
6727 uint32_t dw2 = 0;
6728
c2ce92fc 6729 if (amdgpu_sriov_vf(ring->adev))
95243543 6730 gfx_v8_0_ring_emit_ce_meta(ring);
c2ce92fc 6731
753ad49c
ML
6732 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6733 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
45682886 6734 gfx_v8_0_ring_emit_vgt_flush(ring);
753ad49c
ML
6735 /* set load_global_config & load_global_uconfig */
6736 dw2 |= 0x8001;
6737 /* set load_cs_sh_regs */
6738 dw2 |= 0x01000000;
6739 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6740 dw2 |= 0x10002;
6741
6742 /* set load_ce_ram if preamble presented */
6743 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6744 dw2 |= 0x10000000;
6745 } else {
6746 /* still load_ce_ram if this is the first time preamble presented
6747 * although there is no context switch happens.
6748 */
6749 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6750 dw2 |= 0x10000000;
6751 }
6752
6753 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6754 amdgpu_ring_write(ring, dw2);
6755 amdgpu_ring_write(ring, 0);
6756}
6757
806ba2d4
ML
6758static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6759{
6760 unsigned ret;
6761
6762 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6763 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6764 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6765 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6766 ret = ring->wptr & ring->buf_mask;
6767 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6768 return ret;
6769}
6770
6771static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6772{
6773 unsigned cur;
6774
6775 BUG_ON(offset > ring->buf_mask);
6776 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6777
6778 cur = (ring->wptr & ring->buf_mask) - 1;
6779 if (likely(cur > offset))
6780 ring->ring[offset] = cur - offset;
6781 else
6782 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6783}
6784
880e87e3
XY
6785static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6786{
6787 struct amdgpu_device *adev = ring->adev;
6788
6789 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6790 amdgpu_ring_write(ring, 0 | /* src: register*/
6791 (5 << 8) | /* dst: memory */
6792 (1 << 20)); /* write confirm */
6793 amdgpu_ring_write(ring, reg);
6794 amdgpu_ring_write(ring, 0);
6795 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6796 adev->virt.reg_val_offs * 4));
6797 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6798 adev->virt.reg_val_offs * 4));
6799}
6800
6801static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6802 uint32_t val)
6803{
6804 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6805 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6806 amdgpu_ring_write(ring, reg);
6807 amdgpu_ring_write(ring, 0);
6808 amdgpu_ring_write(ring, val);
6809}
6810
aaa36a97
AD
6811static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6812 enum amdgpu_interrupt_state state)
6813{
61cb8cef
TSD
6814 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6815 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6816}
6817
6818static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6819 int me, int pipe,
6820 enum amdgpu_interrupt_state state)
6821{
763a47b8
AR
6822 /* Me 0 is reserved for graphics */
6823 if (me < 1 || me > adev->gfx.mec.num_mec) {
6824 DRM_ERROR("Ignoring request to enable interrupts for invalid me:%d\n", me);
6825 return;
6826 }
aaa36a97 6827
763a47b8
AR
6828 if (pipe >= adev->gfx.mec.num_pipe_per_mec) {
6829 DRM_ERROR("Ignoring request to enable interrupts for invalid "
6830 "me:%d pipe:%d\n", pipe, me);
aaa36a97
AD
6831 return;
6832 }
6833
763a47b8
AR
6834 mutex_lock(&adev->srbm_mutex);
6835 vi_srbm_select(adev, me, pipe, 0, 0);
6836
6837 WREG32_FIELD(CPC_INT_CNTL, TIME_STAMP_INT_ENABLE,
6838 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6839
6840 vi_srbm_select(adev, 0, 0, 0, 0);
6841 mutex_unlock(&adev->srbm_mutex);
aaa36a97
AD
6842}
6843
6844static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6845 struct amdgpu_irq_src *source,
6846 unsigned type,
6847 enum amdgpu_interrupt_state state)
6848{
61cb8cef
TSD
6849 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6850 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6851
6852 return 0;
6853}
6854
6855static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6856 struct amdgpu_irq_src *source,
6857 unsigned type,
6858 enum amdgpu_interrupt_state state)
6859{
61cb8cef
TSD
6860 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6861 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
aaa36a97
AD
6862
6863 return 0;
6864}
6865
6866static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6867 struct amdgpu_irq_src *src,
6868 unsigned type,
6869 enum amdgpu_interrupt_state state)
6870{
6871 switch (type) {
6872 case AMDGPU_CP_IRQ_GFX_EOP:
6873 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6874 break;
6875 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6876 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6877 break;
6878 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6879 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6880 break;
6881 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6882 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6883 break;
6884 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6885 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6886 break;
6887 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6888 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6889 break;
6890 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6891 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6892 break;
6893 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6894 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6895 break;
6896 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6897 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6898 break;
6899 default:
6900 break;
6901 }
6902 return 0;
6903}
6904
6905static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6906 struct amdgpu_irq_src *source,
6907 struct amdgpu_iv_entry *entry)
6908{
6909 int i;
6910 u8 me_id, pipe_id, queue_id;
6911 struct amdgpu_ring *ring;
6912
6913 DRM_DEBUG("IH: CP EOP\n");
6914 me_id = (entry->ring_id & 0x0c) >> 2;
6915 pipe_id = (entry->ring_id & 0x03) >> 0;
6916 queue_id = (entry->ring_id & 0x70) >> 4;
6917
6918 switch (me_id) {
6919 case 0:
6920 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6921 break;
6922 case 1:
6923 case 2:
6924 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6925 ring = &adev->gfx.compute_ring[i];
6926 /* Per-queue interrupt is supported for MEC starting from VI.
6927 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6928 */
6929 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6930 amdgpu_fence_process(ring);
6931 }
6932 break;
6933 }
6934 return 0;
6935}
6936
6937static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6938 struct amdgpu_irq_src *source,
6939 struct amdgpu_iv_entry *entry)
6940{
6941 DRM_ERROR("Illegal register access in command stream\n");
6942 schedule_work(&adev->reset_work);
6943 return 0;
6944}
6945
6946static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6947 struct amdgpu_irq_src *source,
6948 struct amdgpu_iv_entry *entry)
6949{
6950 DRM_ERROR("Illegal instruction in command stream\n");
6951 schedule_work(&adev->reset_work);
6952 return 0;
6953}
6954
4e638ae9
XY
6955static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6956 struct amdgpu_irq_src *src,
6957 unsigned int type,
6958 enum amdgpu_interrupt_state state)
6959{
07c397f9 6960 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6961
4e638ae9
XY
6962 switch (type) {
6963 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
ccaf3574
TSD
6964 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6965 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6966 if (ring->me == 1)
6967 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6968 ring->pipe,
6969 GENERIC2_INT_ENABLE,
6970 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6971 else
6972 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6973 ring->pipe,
6974 GENERIC2_INT_ENABLE,
6975 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
4e638ae9
XY
6976 break;
6977 default:
6978 BUG(); /* kiq only support GENERIC2_INT now */
6979 break;
6980 }
6981 return 0;
6982}
6983
6984static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6985 struct amdgpu_irq_src *source,
6986 struct amdgpu_iv_entry *entry)
6987{
6988 u8 me_id, pipe_id, queue_id;
07c397f9 6989 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4e638ae9 6990
4e638ae9
XY
6991 me_id = (entry->ring_id & 0x0c) >> 2;
6992 pipe_id = (entry->ring_id & 0x03) >> 0;
6993 queue_id = (entry->ring_id & 0x70) >> 4;
6994 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6995 me_id, pipe_id, queue_id);
6996
6997 amdgpu_fence_process(ring);
6998 return 0;
6999}
7000
a1255107 7001static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 7002 .name = "gfx_v8_0",
aaa36a97 7003 .early_init = gfx_v8_0_early_init,
ccba7691 7004 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
7005 .sw_init = gfx_v8_0_sw_init,
7006 .sw_fini = gfx_v8_0_sw_fini,
7007 .hw_init = gfx_v8_0_hw_init,
7008 .hw_fini = gfx_v8_0_hw_fini,
7009 .suspend = gfx_v8_0_suspend,
7010 .resume = gfx_v8_0_resume,
7011 .is_idle = gfx_v8_0_is_idle,
7012 .wait_for_idle = gfx_v8_0_wait_for_idle,
3d7c6384 7013 .check_soft_reset = gfx_v8_0_check_soft_reset,
1057f20c 7014 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
aaa36a97 7015 .soft_reset = gfx_v8_0_soft_reset,
e4ae0fc3 7016 .post_soft_reset = gfx_v8_0_post_soft_reset,
aaa36a97
AD
7017 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7018 .set_powergating_state = gfx_v8_0_set_powergating_state,
ebd843d6 7019 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
aaa36a97
AD
7020};
7021
7022static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
21cd942e 7023 .type = AMDGPU_RING_TYPE_GFX,
79887142
CK
7024 .align_mask = 0xff,
7025 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7026 .support_64bit_ptrs = false,
e7706b42 7027 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7028 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7029 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
e9d672b2
ML
7030 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7031 5 + /* COND_EXEC */
7032 7 + /* PIPELINE_SYNC */
7033 19 + /* VM_FLUSH */
7034 8 + /* FENCE for VM_FLUSH */
7035 20 + /* GDS switch */
7036 4 + /* double SWITCH_BUFFER,
7037 the first COND_EXEC jump to the place just
7038 prior to this double SWITCH_BUFFER */
7039 5 + /* COND_EXEC */
7040 7 + /* HDP_flush */
7041 4 + /* VGT_flush */
7042 14 + /* CE_META */
7043 31 + /* DE_META */
7044 3 + /* CNTX_CTRL */
7045 5 + /* HDP_INVL */
7046 8 + 8 + /* FENCE x2 */
7047 2, /* SWITCH_BUFFER */
e12f3d7a 7048 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
93323131 7049 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 7050 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 7051 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7052 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7053 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 7054 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 7055 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
7056 .test_ring = gfx_v8_0_ring_test_ring,
7057 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7058 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7059 .pad_ib = amdgpu_ring_generic_pad_ib,
c2167a65 7060 .emit_switch_buffer = gfx_v8_ring_emit_sb,
753ad49c 7061 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
806ba2d4
ML
7062 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7063 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
aaa36a97
AD
7064};
7065
7066static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
21cd942e 7067 .type = AMDGPU_RING_TYPE_COMPUTE,
79887142
CK
7068 .align_mask = 0xff,
7069 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7070 .support_64bit_ptrs = false,
e7706b42 7071 .get_rptr = gfx_v8_0_ring_get_rptr,
aaa36a97
AD
7072 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7073 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
e12f3d7a
CK
7074 .emit_frame_size =
7075 20 + /* gfx_v8_0_ring_emit_gds_switch */
7076 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7077 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7078 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7079 17 + /* gfx_v8_0_ring_emit_vm_flush */
7080 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7081 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
93323131 7082 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 7083 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 7084 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
7085 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7086 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 7087 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 7088 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
7089 .test_ring = gfx_v8_0_ring_test_ring,
7090 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 7091 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 7092 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
7093};
7094
4e638ae9
XY
7095static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7096 .type = AMDGPU_RING_TYPE_KIQ,
7097 .align_mask = 0xff,
7098 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
536fbf94 7099 .support_64bit_ptrs = false,
4e638ae9
XY
7100 .get_rptr = gfx_v8_0_ring_get_rptr,
7101 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7102 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7103 .emit_frame_size =
7104 20 + /* gfx_v8_0_ring_emit_gds_switch */
7105 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7106 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7107 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7108 17 + /* gfx_v8_0_ring_emit_vm_flush */
7109 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7110 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7111 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7112 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
4e638ae9
XY
7113 .test_ring = gfx_v8_0_ring_test_ring,
7114 .test_ib = gfx_v8_0_ring_test_ib,
7115 .insert_nop = amdgpu_ring_insert_nop,
7116 .pad_ib = amdgpu_ring_generic_pad_ib,
880e87e3
XY
7117 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7118 .emit_wreg = gfx_v8_0_ring_emit_wreg,
4e638ae9
XY
7119};
7120
aaa36a97
AD
7121static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7122{
7123 int i;
7124
4e638ae9
XY
7125 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7126
aaa36a97
AD
7127 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7128 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7129
7130 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7131 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7132}
7133
7134static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7135 .set = gfx_v8_0_set_eop_interrupt_state,
7136 .process = gfx_v8_0_eop_irq,
7137};
7138
7139static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7140 .set = gfx_v8_0_set_priv_reg_fault_state,
7141 .process = gfx_v8_0_priv_reg_irq,
7142};
7143
7144static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7145 .set = gfx_v8_0_set_priv_inst_fault_state,
7146 .process = gfx_v8_0_priv_inst_irq,
7147};
7148
4e638ae9
XY
7149static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7150 .set = gfx_v8_0_kiq_set_interrupt_state,
7151 .process = gfx_v8_0_kiq_irq,
7152};
7153
aaa36a97
AD
7154static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7155{
7156 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7157 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7158
7159 adev->gfx.priv_reg_irq.num_types = 1;
7160 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7161
7162 adev->gfx.priv_inst_irq.num_types = 1;
7163 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4e638ae9
XY
7164
7165 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7166 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
aaa36a97
AD
7167}
7168
dbff57bc
AD
7169static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7170{
ae6a58e4 7171 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
dbff57bc
AD
7172}
7173
aaa36a97
AD
7174static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7175{
7176 /* init asci gds info */
7177 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7178 adev->gds.gws.total_size = 64;
7179 adev->gds.oa.total_size = 16;
7180
7181 if (adev->gds.mem.total_size == 64 * 1024) {
7182 adev->gds.mem.gfx_partition_size = 4096;
7183 adev->gds.mem.cs_partition_size = 4096;
7184
7185 adev->gds.gws.gfx_partition_size = 4;
7186 adev->gds.gws.cs_partition_size = 4;
7187
7188 adev->gds.oa.gfx_partition_size = 4;
7189 adev->gds.oa.cs_partition_size = 1;
7190 } else {
7191 adev->gds.mem.gfx_partition_size = 1024;
7192 adev->gds.mem.cs_partition_size = 1024;
7193
7194 adev->gds.gws.gfx_partition_size = 16;
7195 adev->gds.gws.cs_partition_size = 16;
7196
7197 adev->gds.oa.gfx_partition_size = 4;
7198 adev->gds.oa.cs_partition_size = 4;
7199 }
7200}
7201
9de06de8
NH
7202static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7203 u32 bitmap)
7204{
7205 u32 data;
7206
7207 if (!bitmap)
7208 return;
7209
7210 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7211 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7212
7213 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7214}
7215
8f8e00c1 7216static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 7217{
8f8e00c1 7218 u32 data, mask;
aaa36a97 7219
5003f278
TSD
7220 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7221 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 7222
6157bd7a 7223 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 7224
5003f278 7225 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
aaa36a97
AD
7226}
7227
7dae69a2 7228static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
7229{
7230 int i, j, k, counter, active_cu_number = 0;
7231 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 7232 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 7233 unsigned disable_masks[4 * 2];
fe723cd3 7234 u32 ao_cu_num;
aaa36a97 7235
6157bd7a
FC
7236 memset(cu_info, 0, sizeof(*cu_info));
7237
fe723cd3
RZ
7238 if (adev->flags & AMD_IS_APU)
7239 ao_cu_num = 2;
7240 else
7241 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7242
9de06de8
NH
7243 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7244
aaa36a97
AD
7245 mutex_lock(&adev->grbm_idx_mutex);
7246 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7247 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7248 mask = 1;
7249 ao_bitmap = 0;
7250 counter = 0;
9559ef5b 7251 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
7252 if (i < 4 && j < 2)
7253 gfx_v8_0_set_user_cu_inactive_bitmap(
7254 adev, disable_masks[i * 2 + j]);
8f8e00c1 7255 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
7256 cu_info->bitmap[i][j] = bitmap;
7257
fe723cd3 7258 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
aaa36a97 7259 if (bitmap & mask) {
fe723cd3 7260 if (counter < ao_cu_num)
aaa36a97
AD
7261 ao_bitmap |= mask;
7262 counter ++;
7263 }
7264 mask <<= 1;
7265 }
7266 active_cu_number += counter;
7267 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7268 }
7269 }
9559ef5b 7270 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 7271 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
7272
7273 cu_info->number = active_cu_number;
7274 cu_info->ao_cu_mask = ao_cu_mask;
aaa36a97 7275}
a1255107
AD
7276
7277const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7278{
7279 .type = AMD_IP_BLOCK_TYPE_GFX,
7280 .major = 8,
7281 .minor = 0,
7282 .rev = 0,
7283 .funcs = &gfx_v8_0_ip_funcs,
7284};
7285
7286const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7287{
7288 .type = AMD_IP_BLOCK_TYPE_GFX,
7289 .major = 8,
7290 .minor = 1,
7291 .rev = 0,
7292 .funcs = &gfx_v8_0_ip_funcs,
7293};
acad2b2a 7294
95243543 7295static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
acad2b2a
ML
7296{
7297 uint64_t ce_payload_addr;
7298 int cnt_ce;
7299 static union {
49abb980
XY
7300 struct vi_ce_ib_state regular;
7301 struct vi_ce_ib_state_chained_ib chained;
e8411302 7302 } ce_payload = {};
acad2b2a
ML
7303
7304 if (ring->adev->virt.chained_ib_support) {
95243543
ML
7305 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7306 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
acad2b2a
ML
7307 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7308 } else {
95243543
ML
7309 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7310 offsetof(struct vi_gfx_meta_data, ce_payload);
acad2b2a
ML
7311 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7312 }
7313
7314 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7315 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7316 WRITE_DATA_DST_SEL(8) |
7317 WR_CONFIRM) |
7318 WRITE_DATA_CACHE_POLICY(0));
7319 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7320 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7321 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7322}
7323
95243543 7324static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
acad2b2a 7325{
95243543 7326 uint64_t de_payload_addr, gds_addr, csa_addr;
acad2b2a
ML
7327 int cnt_de;
7328 static union {
49abb980
XY
7329 struct vi_de_ib_state regular;
7330 struct vi_de_ib_state_chained_ib chained;
e8411302 7331 } de_payload = {};
acad2b2a 7332
95243543 7333 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
acad2b2a
ML
7334 gds_addr = csa_addr + 4096;
7335 if (ring->adev->virt.chained_ib_support) {
7336 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7337 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7338 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
acad2b2a
ML
7339 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7340 } else {
7341 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7342 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
49abb980 7343 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
acad2b2a
ML
7344 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7345 }
7346
7347 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7348 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7349 WRITE_DATA_DST_SEL(8) |
7350 WR_CONFIRM) |
7351 WRITE_DATA_CACHE_POLICY(0));
7352 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7353 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7354 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7355}
5ff98043
ML
7356
7357/* create MQD for each compute queue */
0875a242 7358static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
5ff98043
ML
7359{
7360 struct amdgpu_ring *ring = NULL;
7361 int r, i;
7362
7363 /* create MQD for KIQ */
7364 ring = &adev->gfx.kiq.ring;
7365 if (!ring->mqd_obj) {
7366 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7367 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7368 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7369 if (r) {
7370 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7371 return r;
7372 }
9b49c3ab
ML
7373
7374 /* prepare MQD backup */
7375 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7376 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7377 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7378 }
7379
7380 /* create MQD for each KCQ */
b0ac2a32 7381 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5ff98043
ML
7382 ring = &adev->gfx.compute_ring[i];
7383 if (!ring->mqd_obj) {
7384 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
b0ac2a32
AD
7385 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7386 &ring->mqd_gpu_addr, &ring->mqd_ptr);
5ff98043
ML
7387 if (r) {
7388 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7389 return r;
7390 }
9b49c3ab
ML
7391
7392 /* prepare MQD backup */
7393 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7394 if (!adev->gfx.mec.mqd_backup[i])
7395 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
5ff98043
ML
7396 }
7397 }
7398
7399 return 0;
7400}
7401
0875a242 7402static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
5ff98043
ML
7403{
7404 struct amdgpu_ring *ring = NULL;
7405 int i;
7406
7407 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7408 ring = &adev->gfx.compute_ring[i];
24de7515 7409 kfree(adev->gfx.mec.mqd_backup[i]);
59a82d7d
XY
7410 amdgpu_bo_free_kernel(&ring->mqd_obj,
7411 &ring->mqd_gpu_addr,
7412 &ring->mqd_ptr);
5ff98043
ML
7413 }
7414
7415 ring = &adev->gfx.kiq.ring;
24de7515 7416 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
59a82d7d
XY
7417 amdgpu_bo_free_kernel(&ring->mqd_obj,
7418 &ring->mqd_gpu_addr,
7419 &ring->mqd_ptr);
7420}