Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
28#include "vid.h"
29#include "amdgpu_ucode.h"
68182d90 30#include "amdgpu_atombios.h"
eeade25a 31#include "atombios_i2c.h"
aaa36a97
AD
32#include "clearstate_vi.h"
33
34#include "gmc/gmc_8_2_d.h"
35#include "gmc/gmc_8_2_sh_mask.h"
36
37#include "oss/oss_3_0_d.h"
38#include "oss/oss_3_0_sh_mask.h"
39
40#include "bif/bif_5_0_d.h"
41#include "bif/bif_5_0_sh_mask.h"
42
43#include "gca/gfx_8_0_d.h"
44#include "gca/gfx_8_0_enum.h"
45#include "gca/gfx_8_0_sh_mask.h"
46#include "gca/gfx_8_0_enum.h"
47
aaa36a97
AD
48#include "dce/dce_10_0_d.h"
49#include "dce/dce_10_0_sh_mask.h"
50
d9d533c1
KW
51#include "smu/smu_7_1_3_d.h"
52
aaa36a97
AD
53#define GFX8_NUM_GFX_RINGS 1
54#define GFX8_NUM_COMPUTE_RINGS 8
55
56#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 58#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
59#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
6e378858
EH
71#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
77
78/* BPM SERDES CMD */
79#define SET_BPM_SERDES_CMD 1
80#define CLE_BPM_SERDES_CMD 0
81
82/* BPM Register Address*/
83enum {
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
89 BPM_REG_FGCG_MAX
90};
91
2b6cd977
EH
92#define RLC_FormatDirectRegListLength 14
93
c65444fe
JZ
94MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
e3c7656c
SL
101MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
c65444fe
JZ
107MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 118MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 119
af15a2d5
DZ
120MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
2cc0c0b5
FC
127MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
68182d90 133
2cc0c0b5
FC
134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 140
aaa36a97
AD
141static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142{
143 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159};
160
161static const u32 golden_settings_tonga_a11[] =
162{
163 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 170 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
171 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 173 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
174 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 177 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
178};
179
180static const u32 tonga_golden_common_all[] =
181{
182 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190};
191
192static const u32 tonga_mgcg_cgcg_init[] =
193{
194 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269};
270
2cc0c0b5 271static const u32 golden_settings_polaris11_a11[] =
68182d90 272{
b9934878 273 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
68182d90
FC
274 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
275 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
276 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
277 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
278 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
279 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
280 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
281 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
282 mmSQ_CONFIG, 0x07f80000, 0x07180000,
283 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
284 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
285 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
286 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
287 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 288 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
289};
290
2cc0c0b5 291static const u32 polaris11_golden_common_all[] =
68182d90
FC
292{
293 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
294 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
295 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
296 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
297 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
298 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
299};
300
2cc0c0b5 301static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
302{
303 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
92d15768
RZ
304 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0, 0x0f000000,
68182d90
FC
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x07180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 319 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
320};
321
2cc0c0b5 322static const u32 polaris10_golden_common_all[] =
68182d90
FC
323{
324 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
325 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
326 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
327 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
328 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
330 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
331 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
332};
333
af15a2d5
DZ
334static const u32 fiji_golden_common_all[] =
335{
336 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
337 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
338 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 339 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
340 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
341 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
342 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
343 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
345 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
346};
347
348static const u32 golden_settings_fiji_a10[] =
349{
350 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
351 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
352 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 353 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
354 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
355 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 356 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
357 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
358 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 359 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 360 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
361};
362
363static const u32 fiji_mgcg_cgcg_init[] =
364{
a7ca8ef9 365 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
366 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
367 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
372 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
373 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
374 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
375 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
376 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
383 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
384 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
385 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
386 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
387 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
390 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
391 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
392 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
395 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
396 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
397 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
398 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
399 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
400};
401
aaa36a97
AD
402static const u32 golden_settings_iceland_a11[] =
403{
404 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
405 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
406 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
407 mmGB_GPU_ID, 0x0000000f, 0x00000000,
408 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
409 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
410 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
411 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
6a00a09e 412 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
413 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
414 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 415 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
416 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
417 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
418 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
419};
420
421static const u32 iceland_golden_common_all[] =
422{
423 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
424 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
425 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
426 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
427 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
428 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
429 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
430 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
431};
432
433static const u32 iceland_mgcg_cgcg_init[] =
434{
435 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
436 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
439 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
440 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
441 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
442 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
444 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
446 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
453 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
454 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
455 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
456 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
457 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
458 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
460 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
461 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
462 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
465 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
466 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
469 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
474 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
479 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
489 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
497 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
498 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
499};
500
501static const u32 cz_golden_settings_a11[] =
502{
503 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
504 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
505 mmGB_GPU_ID, 0x0000000f, 0x00000000,
506 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
507 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 508 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 509 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
6a00a09e 510 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
511 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
512 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
513};
514
515static const u32 cz_golden_common_all[] =
516{
517 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
518 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
519 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
520 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
521 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
522 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
523 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
524 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
525};
526
527static const u32 cz_mgcg_cgcg_init[] =
528{
529 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
533 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
534 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
535 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
537 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
538 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
539 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
540 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
541 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
542 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
543 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
547 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
548 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
549 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
550 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
551 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
554 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
555 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
556 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
557 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
558 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
559 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
560 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
563 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
568 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
573 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
578 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
583 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
588 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
593 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
596 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
597 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
598 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
599 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
600 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
601 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
602 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
603 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
604};
605
e3c7656c
SL
606static const u32 stoney_golden_settings_a11[] =
607{
608 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
609 mmGB_GPU_ID, 0x0000000f, 0x00000000,
610 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
611 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
612 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
613 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 614 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
615 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
616 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
617 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
618};
619
620static const u32 stoney_golden_common_all[] =
621{
622 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
623 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
624 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
625 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
626 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
627 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
628 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
629 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
630};
631
632static const u32 stoney_mgcg_cgcg_init[] =
633{
634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
636 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
637 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
638 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
639 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
640};
641
aaa36a97
AD
642static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
643static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
644static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 645static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 646static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 647static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
aaa36a97
AD
648
649static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
650{
651 switch (adev->asic_type) {
652 case CHIP_TOPAZ:
653 amdgpu_program_register_sequence(adev,
654 iceland_mgcg_cgcg_init,
655 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
656 amdgpu_program_register_sequence(adev,
657 golden_settings_iceland_a11,
658 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
659 amdgpu_program_register_sequence(adev,
660 iceland_golden_common_all,
661 (const u32)ARRAY_SIZE(iceland_golden_common_all));
662 break;
af15a2d5
DZ
663 case CHIP_FIJI:
664 amdgpu_program_register_sequence(adev,
665 fiji_mgcg_cgcg_init,
666 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
667 amdgpu_program_register_sequence(adev,
668 golden_settings_fiji_a10,
669 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
670 amdgpu_program_register_sequence(adev,
671 fiji_golden_common_all,
672 (const u32)ARRAY_SIZE(fiji_golden_common_all));
673 break;
674
aaa36a97
AD
675 case CHIP_TONGA:
676 amdgpu_program_register_sequence(adev,
677 tonga_mgcg_cgcg_init,
678 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
679 amdgpu_program_register_sequence(adev,
680 golden_settings_tonga_a11,
681 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
682 amdgpu_program_register_sequence(adev,
683 tonga_golden_common_all,
684 (const u32)ARRAY_SIZE(tonga_golden_common_all));
685 break;
2cc0c0b5 686 case CHIP_POLARIS11:
68182d90 687 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
688 golden_settings_polaris11_a11,
689 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
68182d90 690 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
691 polaris11_golden_common_all,
692 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
68182d90 693 break;
2cc0c0b5 694 case CHIP_POLARIS10:
68182d90 695 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
696 golden_settings_polaris10_a11,
697 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
68182d90 698 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
699 polaris10_golden_common_all,
700 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 701 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
eeade25a
KW
702 if (adev->pdev->revision == 0xc7) {
703 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
704 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
705 }
68182d90 706 break;
aaa36a97
AD
707 case CHIP_CARRIZO:
708 amdgpu_program_register_sequence(adev,
709 cz_mgcg_cgcg_init,
710 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
711 amdgpu_program_register_sequence(adev,
712 cz_golden_settings_a11,
713 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
714 amdgpu_program_register_sequence(adev,
715 cz_golden_common_all,
716 (const u32)ARRAY_SIZE(cz_golden_common_all));
717 break;
e3c7656c
SL
718 case CHIP_STONEY:
719 amdgpu_program_register_sequence(adev,
720 stoney_mgcg_cgcg_init,
721 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
722 amdgpu_program_register_sequence(adev,
723 stoney_golden_settings_a11,
724 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
725 amdgpu_program_register_sequence(adev,
726 stoney_golden_common_all,
727 (const u32)ARRAY_SIZE(stoney_golden_common_all));
728 break;
aaa36a97
AD
729 default:
730 break;
731 }
732}
733
734static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
735{
736 int i;
737
738 adev->gfx.scratch.num_reg = 7;
739 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
740 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
741 adev->gfx.scratch.free[i] = true;
742 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
743 }
744}
745
746static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
747{
748 struct amdgpu_device *adev = ring->adev;
749 uint32_t scratch;
750 uint32_t tmp = 0;
751 unsigned i;
752 int r;
753
754 r = amdgpu_gfx_scratch_get(adev, &scratch);
755 if (r) {
756 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
757 return r;
758 }
759 WREG32(scratch, 0xCAFEDEAD);
a27de35c 760 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
761 if (r) {
762 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
763 ring->idx, r);
764 amdgpu_gfx_scratch_free(adev, scratch);
765 return r;
766 }
767 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
768 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
769 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 770 amdgpu_ring_commit(ring);
aaa36a97
AD
771
772 for (i = 0; i < adev->usec_timeout; i++) {
773 tmp = RREG32(scratch);
774 if (tmp == 0xDEADBEEF)
775 break;
776 DRM_UDELAY(1);
777 }
778 if (i < adev->usec_timeout) {
779 DRM_INFO("ring test on %d succeeded in %d usecs\n",
780 ring->idx, i);
781 } else {
782 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
783 ring->idx, scratch, tmp);
784 r = -EINVAL;
785 }
786 amdgpu_gfx_scratch_free(adev, scratch);
787 return r;
788}
789
790static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
791{
792 struct amdgpu_device *adev = ring->adev;
793 struct amdgpu_ib ib;
1763552e 794 struct fence *f = NULL;
aaa36a97
AD
795 uint32_t scratch;
796 uint32_t tmp = 0;
797 unsigned i;
798 int r;
799
800 r = amdgpu_gfx_scratch_get(adev, &scratch);
801 if (r) {
802 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
803 return r;
804 }
805 WREG32(scratch, 0xCAFEDEAD);
b203dd95 806 memset(&ib, 0, sizeof(ib));
b07c60c0 807 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97
AD
808 if (r) {
809 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
42d13693 810 goto err1;
aaa36a97
AD
811 }
812 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
813 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
814 ib.ptr[2] = 0xDEADBEEF;
815 ib.length_dw = 3;
42d13693 816
c5637837 817 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
42d13693
CZ
818 if (r)
819 goto err2;
820
1763552e 821 r = fence_wait(f, false);
aaa36a97
AD
822 if (r) {
823 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
42d13693 824 goto err2;
aaa36a97
AD
825 }
826 for (i = 0; i < adev->usec_timeout; i++) {
827 tmp = RREG32(scratch);
828 if (tmp == 0xDEADBEEF)
829 break;
830 DRM_UDELAY(1);
831 }
832 if (i < adev->usec_timeout) {
833 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
42d13693
CZ
834 ring->idx, i);
835 goto err2;
aaa36a97
AD
836 } else {
837 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
838 scratch, tmp);
839 r = -EINVAL;
840 }
42d13693 841err2:
281b4223 842 fence_put(f);
cc55c45d 843 amdgpu_ib_free(adev, &ib, NULL);
73cfa5f5 844 fence_put(f);
42d13693
CZ
845err1:
846 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
847 return r;
848}
849
13331ac3
ML
850
851static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
852 release_firmware(adev->gfx.pfp_fw);
853 adev->gfx.pfp_fw = NULL;
854 release_firmware(adev->gfx.me_fw);
855 adev->gfx.me_fw = NULL;
856 release_firmware(adev->gfx.ce_fw);
857 adev->gfx.ce_fw = NULL;
858 release_firmware(adev->gfx.rlc_fw);
859 adev->gfx.rlc_fw = NULL;
860 release_firmware(adev->gfx.mec_fw);
861 adev->gfx.mec_fw = NULL;
862 if ((adev->asic_type != CHIP_STONEY) &&
863 (adev->asic_type != CHIP_TOPAZ))
864 release_firmware(adev->gfx.mec2_fw);
865 adev->gfx.mec2_fw = NULL;
866
867 kfree(adev->gfx.rlc.register_list_format);
868}
869
aaa36a97
AD
870static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
871{
872 const char *chip_name;
873 char fw_name[30];
874 int err;
875 struct amdgpu_firmware_info *info = NULL;
876 const struct common_firmware_header *header = NULL;
595fd013 877 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
878 const struct rlc_firmware_header_v2_0 *rlc_hdr;
879 unsigned int *tmp = NULL, i;
aaa36a97
AD
880
881 DRM_DEBUG("\n");
882
883 switch (adev->asic_type) {
884 case CHIP_TOPAZ:
885 chip_name = "topaz";
886 break;
887 case CHIP_TONGA:
888 chip_name = "tonga";
889 break;
890 case CHIP_CARRIZO:
891 chip_name = "carrizo";
892 break;
af15a2d5
DZ
893 case CHIP_FIJI:
894 chip_name = "fiji";
895 break;
2cc0c0b5
FC
896 case CHIP_POLARIS11:
897 chip_name = "polaris11";
68182d90 898 break;
2cc0c0b5
FC
899 case CHIP_POLARIS10:
900 chip_name = "polaris10";
68182d90 901 break;
e3c7656c
SL
902 case CHIP_STONEY:
903 chip_name = "stoney";
904 break;
aaa36a97
AD
905 default:
906 BUG();
907 }
908
c65444fe 909 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
910 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
911 if (err)
912 goto out;
913 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
914 if (err)
915 goto out;
595fd013
JZ
916 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
917 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
918 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 919
c65444fe 920 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
921 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
922 if (err)
923 goto out;
924 err = amdgpu_ucode_validate(adev->gfx.me_fw);
925 if (err)
926 goto out;
595fd013
JZ
927 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
928 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
929 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 930
c65444fe 931 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
932 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
933 if (err)
934 goto out;
935 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
936 if (err)
937 goto out;
595fd013
JZ
938 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
939 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
940 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 941
c65444fe 942 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
943 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
944 if (err)
945 goto out;
946 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
947 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
948 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
949 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
950
951 adev->gfx.rlc.save_and_restore_offset =
952 le32_to_cpu(rlc_hdr->save_and_restore_offset);
953 adev->gfx.rlc.clear_state_descriptor_offset =
954 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
955 adev->gfx.rlc.avail_scratch_ram_locations =
956 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
957 adev->gfx.rlc.reg_restore_list_size =
958 le32_to_cpu(rlc_hdr->reg_restore_list_size);
959 adev->gfx.rlc.reg_list_format_start =
960 le32_to_cpu(rlc_hdr->reg_list_format_start);
961 adev->gfx.rlc.reg_list_format_separate_start =
962 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
963 adev->gfx.rlc.starting_offsets_start =
964 le32_to_cpu(rlc_hdr->starting_offsets_start);
965 adev->gfx.rlc.reg_list_format_size_bytes =
966 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
967 adev->gfx.rlc.reg_list_size_bytes =
968 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
969
970 adev->gfx.rlc.register_list_format =
971 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
972 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
973
974 if (!adev->gfx.rlc.register_list_format) {
975 err = -ENOMEM;
976 goto out;
977 }
978
ae17c999 979 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
980 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
981 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
982 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
983
984 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
985
ae17c999 986 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
987 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
988 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
989 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 990
c65444fe 991 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
992 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
993 if (err)
994 goto out;
995 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
996 if (err)
997 goto out;
595fd013
JZ
998 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
999 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1000 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 1001
97dde76a
AD
1002 if ((adev->asic_type != CHIP_STONEY) &&
1003 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
1004 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1005 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1006 if (!err) {
1007 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1008 if (err)
1009 goto out;
1010 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1011 adev->gfx.mec2_fw->data;
1012 adev->gfx.mec2_fw_version =
1013 le32_to_cpu(cp_hdr->header.ucode_version);
1014 adev->gfx.mec2_feature_version =
1015 le32_to_cpu(cp_hdr->ucode_feature_version);
1016 } else {
1017 err = 0;
1018 adev->gfx.mec2_fw = NULL;
1019 }
aaa36a97
AD
1020 }
1021
1022 if (adev->firmware.smu_load) {
1023 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1024 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1025 info->fw = adev->gfx.pfp_fw;
1026 header = (const struct common_firmware_header *)info->fw->data;
1027 adev->firmware.fw_size +=
1028 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029
1030 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1031 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1032 info->fw = adev->gfx.me_fw;
1033 header = (const struct common_firmware_header *)info->fw->data;
1034 adev->firmware.fw_size +=
1035 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1036
1037 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1038 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1039 info->fw = adev->gfx.ce_fw;
1040 header = (const struct common_firmware_header *)info->fw->data;
1041 adev->firmware.fw_size +=
1042 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1043
1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1045 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1046 info->fw = adev->gfx.rlc_fw;
1047 header = (const struct common_firmware_header *)info->fw->data;
1048 adev->firmware.fw_size +=
1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1052 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1053 info->fw = adev->gfx.mec_fw;
1054 header = (const struct common_firmware_header *)info->fw->data;
1055 adev->firmware.fw_size +=
1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058 if (adev->gfx.mec2_fw) {
1059 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1060 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1061 info->fw = adev->gfx.mec2_fw;
1062 header = (const struct common_firmware_header *)info->fw->data;
1063 adev->firmware.fw_size +=
1064 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065 }
1066
1067 }
1068
1069out:
1070 if (err) {
1071 dev_err(adev->dev,
1072 "gfx8: Failed to load firmware \"%s\"\n",
1073 fw_name);
1074 release_firmware(adev->gfx.pfp_fw);
1075 adev->gfx.pfp_fw = NULL;
1076 release_firmware(adev->gfx.me_fw);
1077 adev->gfx.me_fw = NULL;
1078 release_firmware(adev->gfx.ce_fw);
1079 adev->gfx.ce_fw = NULL;
1080 release_firmware(adev->gfx.rlc_fw);
1081 adev->gfx.rlc_fw = NULL;
1082 release_firmware(adev->gfx.mec_fw);
1083 adev->gfx.mec_fw = NULL;
1084 release_firmware(adev->gfx.mec2_fw);
1085 adev->gfx.mec2_fw = NULL;
1086 }
1087 return err;
1088}
1089
2b6cd977
EH
1090static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1091 volatile u32 *buffer)
1092{
1093 u32 count = 0, i;
1094 const struct cs_section_def *sect = NULL;
1095 const struct cs_extent_def *ext = NULL;
1096
1097 if (adev->gfx.rlc.cs_data == NULL)
1098 return;
1099 if (buffer == NULL)
1100 return;
1101
1102 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1103 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1104
1105 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1106 buffer[count++] = cpu_to_le32(0x80000000);
1107 buffer[count++] = cpu_to_le32(0x80000000);
1108
1109 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1110 for (ext = sect->section; ext->extent != NULL; ++ext) {
1111 if (sect->id == SECT_CONTEXT) {
1112 buffer[count++] =
1113 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1114 buffer[count++] = cpu_to_le32(ext->reg_index -
1115 PACKET3_SET_CONTEXT_REG_START);
1116 for (i = 0; i < ext->reg_count; i++)
1117 buffer[count++] = cpu_to_le32(ext->extent[i]);
1118 } else {
1119 return;
1120 }
1121 }
1122 }
1123
1124 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1125 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1126 PACKET3_SET_CONTEXT_REG_START);
1127 switch (adev->asic_type) {
1128 case CHIP_TONGA:
2cc0c0b5 1129 case CHIP_POLARIS10:
2b6cd977
EH
1130 buffer[count++] = cpu_to_le32(0x16000012);
1131 buffer[count++] = cpu_to_le32(0x0000002A);
1132 break;
2cc0c0b5 1133 case CHIP_POLARIS11:
f4bfffdd
EH
1134 buffer[count++] = cpu_to_le32(0x16000012);
1135 buffer[count++] = cpu_to_le32(0x00000000);
1136 break;
2b6cd977
EH
1137 case CHIP_FIJI:
1138 buffer[count++] = cpu_to_le32(0x3a00161a);
1139 buffer[count++] = cpu_to_le32(0x0000002e);
1140 break;
1141 case CHIP_TOPAZ:
1142 case CHIP_CARRIZO:
1143 buffer[count++] = cpu_to_le32(0x00000002);
1144 buffer[count++] = cpu_to_le32(0x00000000);
1145 break;
1146 case CHIP_STONEY:
1147 buffer[count++] = cpu_to_le32(0x00000000);
1148 buffer[count++] = cpu_to_le32(0x00000000);
1149 break;
1150 default:
1151 buffer[count++] = cpu_to_le32(0x00000000);
1152 buffer[count++] = cpu_to_le32(0x00000000);
1153 break;
1154 }
1155
1156 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1157 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1158
1159 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1160 buffer[count++] = cpu_to_le32(0);
1161}
1162
1163static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1164{
1165 int r;
1166
1167 /* clear state block */
1168 if (adev->gfx.rlc.clear_state_obj) {
1169 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1170 if (unlikely(r != 0))
1171 dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1172 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1173 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1174
1175 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1176 adev->gfx.rlc.clear_state_obj = NULL;
1177 }
1178}
1179
1180static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1181{
1182 volatile u32 *dst_ptr;
1183 u32 dws;
1184 const struct cs_section_def *cs_data;
1185 int r;
1186
1187 adev->gfx.rlc.cs_data = vi_cs_data;
1188
1189 cs_data = adev->gfx.rlc.cs_data;
1190
1191 if (cs_data) {
1192 /* clear state block */
1193 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1194
1195 if (adev->gfx.rlc.clear_state_obj == NULL) {
1196 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1197 AMDGPU_GEM_DOMAIN_VRAM,
1198 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1199 NULL, NULL,
1200 &adev->gfx.rlc.clear_state_obj);
1201 if (r) {
1202 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1203 gfx_v8_0_rlc_fini(adev);
1204 return r;
1205 }
1206 }
1207 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1208 if (unlikely(r != 0)) {
1209 gfx_v8_0_rlc_fini(adev);
1210 return r;
1211 }
1212 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1213 &adev->gfx.rlc.clear_state_gpu_addr);
1214 if (r) {
1215 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1216 dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1217 gfx_v8_0_rlc_fini(adev);
1218 return r;
1219 }
1220
1221 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1222 if (r) {
1223 dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1224 gfx_v8_0_rlc_fini(adev);
1225 return r;
1226 }
1227 /* set up the cs buffer */
1228 dst_ptr = adev->gfx.rlc.cs_ptr;
1229 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1230 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1231 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1232 }
1233
1234 return 0;
1235}
1236
aaa36a97
AD
1237static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1238{
1239 int r;
1240
1241 if (adev->gfx.mec.hpd_eop_obj) {
1242 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1243 if (unlikely(r != 0))
1244 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1245 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1246 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1247
1248 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1249 adev->gfx.mec.hpd_eop_obj = NULL;
1250 }
1251}
1252
1253#define MEC_HPD_SIZE 2048
1254
1255static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1256{
1257 int r;
1258 u32 *hpd;
1259
1260 /*
1261 * we assign only 1 pipe because all other pipes will
1262 * be handled by KFD
1263 */
1264 adev->gfx.mec.num_mec = 1;
1265 adev->gfx.mec.num_pipe = 1;
1266 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1267
1268 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1269 r = amdgpu_bo_create(adev,
1270 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1271 PAGE_SIZE, true,
72d7668b 1272 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
1273 &adev->gfx.mec.hpd_eop_obj);
1274 if (r) {
1275 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1276 return r;
1277 }
1278 }
1279
1280 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1281 if (unlikely(r != 0)) {
1282 gfx_v8_0_mec_fini(adev);
1283 return r;
1284 }
1285 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1286 &adev->gfx.mec.hpd_eop_gpu_addr);
1287 if (r) {
1288 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1289 gfx_v8_0_mec_fini(adev);
1290 return r;
1291 }
1292 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1293 if (r) {
1294 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1295 gfx_v8_0_mec_fini(adev);
1296 return r;
1297 }
1298
1299 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1300
1301 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1302 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1303
1304 return 0;
1305}
1306
ccba7691
AD
1307static const u32 vgpr_init_compute_shader[] =
1308{
1309 0x7e000209, 0x7e020208,
1310 0x7e040207, 0x7e060206,
1311 0x7e080205, 0x7e0a0204,
1312 0x7e0c0203, 0x7e0e0202,
1313 0x7e100201, 0x7e120200,
1314 0x7e140209, 0x7e160208,
1315 0x7e180207, 0x7e1a0206,
1316 0x7e1c0205, 0x7e1e0204,
1317 0x7e200203, 0x7e220202,
1318 0x7e240201, 0x7e260200,
1319 0x7e280209, 0x7e2a0208,
1320 0x7e2c0207, 0x7e2e0206,
1321 0x7e300205, 0x7e320204,
1322 0x7e340203, 0x7e360202,
1323 0x7e380201, 0x7e3a0200,
1324 0x7e3c0209, 0x7e3e0208,
1325 0x7e400207, 0x7e420206,
1326 0x7e440205, 0x7e460204,
1327 0x7e480203, 0x7e4a0202,
1328 0x7e4c0201, 0x7e4e0200,
1329 0x7e500209, 0x7e520208,
1330 0x7e540207, 0x7e560206,
1331 0x7e580205, 0x7e5a0204,
1332 0x7e5c0203, 0x7e5e0202,
1333 0x7e600201, 0x7e620200,
1334 0x7e640209, 0x7e660208,
1335 0x7e680207, 0x7e6a0206,
1336 0x7e6c0205, 0x7e6e0204,
1337 0x7e700203, 0x7e720202,
1338 0x7e740201, 0x7e760200,
1339 0x7e780209, 0x7e7a0208,
1340 0x7e7c0207, 0x7e7e0206,
1341 0xbf8a0000, 0xbf810000,
1342};
1343
1344static const u32 sgpr_init_compute_shader[] =
1345{
1346 0xbe8a0100, 0xbe8c0102,
1347 0xbe8e0104, 0xbe900106,
1348 0xbe920108, 0xbe940100,
1349 0xbe960102, 0xbe980104,
1350 0xbe9a0106, 0xbe9c0108,
1351 0xbe9e0100, 0xbea00102,
1352 0xbea20104, 0xbea40106,
1353 0xbea60108, 0xbea80100,
1354 0xbeaa0102, 0xbeac0104,
1355 0xbeae0106, 0xbeb00108,
1356 0xbeb20100, 0xbeb40102,
1357 0xbeb60104, 0xbeb80106,
1358 0xbeba0108, 0xbebc0100,
1359 0xbebe0102, 0xbec00104,
1360 0xbec20106, 0xbec40108,
1361 0xbec60100, 0xbec80102,
1362 0xbee60004, 0xbee70005,
1363 0xbeea0006, 0xbeeb0007,
1364 0xbee80008, 0xbee90009,
1365 0xbefc0000, 0xbf8a0000,
1366 0xbf810000, 0x00000000,
1367};
1368
1369static const u32 vgpr_init_regs[] =
1370{
1371 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1372 mmCOMPUTE_RESOURCE_LIMITS, 0,
1373 mmCOMPUTE_NUM_THREAD_X, 256*4,
1374 mmCOMPUTE_NUM_THREAD_Y, 1,
1375 mmCOMPUTE_NUM_THREAD_Z, 1,
1376 mmCOMPUTE_PGM_RSRC2, 20,
1377 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1378 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1379 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1380 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1381 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1382 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1383 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1384 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1385 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1386 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1387};
1388
1389static const u32 sgpr1_init_regs[] =
1390{
1391 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1392 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1393 mmCOMPUTE_NUM_THREAD_X, 256*5,
1394 mmCOMPUTE_NUM_THREAD_Y, 1,
1395 mmCOMPUTE_NUM_THREAD_Z, 1,
1396 mmCOMPUTE_PGM_RSRC2, 20,
1397 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1398 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1399 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1400 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1401 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1402 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1403 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1404 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1405 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1406 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1407};
1408
1409static const u32 sgpr2_init_regs[] =
1410{
1411 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1412 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1413 mmCOMPUTE_NUM_THREAD_X, 256*5,
1414 mmCOMPUTE_NUM_THREAD_Y, 1,
1415 mmCOMPUTE_NUM_THREAD_Z, 1,
1416 mmCOMPUTE_PGM_RSRC2, 20,
1417 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1418 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1419 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1420 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1421 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1422 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1423 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1424 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1425 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1426 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1427};
1428
1429static const u32 sec_ded_counter_registers[] =
1430{
1431 mmCPC_EDC_ATC_CNT,
1432 mmCPC_EDC_SCRATCH_CNT,
1433 mmCPC_EDC_UCODE_CNT,
1434 mmCPF_EDC_ATC_CNT,
1435 mmCPF_EDC_ROQ_CNT,
1436 mmCPF_EDC_TAG_CNT,
1437 mmCPG_EDC_ATC_CNT,
1438 mmCPG_EDC_DMA_CNT,
1439 mmCPG_EDC_TAG_CNT,
1440 mmDC_EDC_CSINVOC_CNT,
1441 mmDC_EDC_RESTORE_CNT,
1442 mmDC_EDC_STATE_CNT,
1443 mmGDS_EDC_CNT,
1444 mmGDS_EDC_GRBM_CNT,
1445 mmGDS_EDC_OA_DED,
1446 mmSPI_EDC_CNT,
1447 mmSQC_ATC_EDC_GATCL1_CNT,
1448 mmSQC_EDC_CNT,
1449 mmSQ_EDC_DED_CNT,
1450 mmSQ_EDC_INFO,
1451 mmSQ_EDC_SEC_CNT,
1452 mmTCC_EDC_CNT,
1453 mmTCP_ATC_EDC_GATCL1_CNT,
1454 mmTCP_EDC_CNT,
1455 mmTD_EDC_CNT
1456};
1457
1458static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1459{
1460 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1461 struct amdgpu_ib ib;
1462 struct fence *f = NULL;
1463 int r, i;
1464 u32 tmp;
1465 unsigned total_size, vgpr_offset, sgpr_offset;
1466 u64 gpu_addr;
1467
1468 /* only supported on CZ */
1469 if (adev->asic_type != CHIP_CARRIZO)
1470 return 0;
1471
1472 /* bail if the compute ring is not ready */
1473 if (!ring->ready)
1474 return 0;
1475
1476 tmp = RREG32(mmGB_EDC_MODE);
1477 WREG32(mmGB_EDC_MODE, 0);
1478
1479 total_size =
1480 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1481 total_size +=
1482 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1483 total_size +=
1484 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1485 total_size = ALIGN(total_size, 256);
1486 vgpr_offset = total_size;
1487 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1488 sgpr_offset = total_size;
1489 total_size += sizeof(sgpr_init_compute_shader);
1490
1491 /* allocate an indirect buffer to put the commands in */
1492 memset(&ib, 0, sizeof(ib));
b07c60c0 1493 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1494 if (r) {
1495 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1496 return r;
1497 }
1498
1499 /* load the compute shaders */
1500 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1501 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1502
1503 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1504 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1505
1506 /* init the ib length to 0 */
1507 ib.length_dw = 0;
1508
1509 /* VGPR */
1510 /* write the register state for the compute dispatch */
1511 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1512 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1513 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1514 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1515 }
1516 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1517 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1518 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1519 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1520 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1521 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1522
1523 /* write dispatch packet */
1524 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1525 ib.ptr[ib.length_dw++] = 8; /* x */
1526 ib.ptr[ib.length_dw++] = 1; /* y */
1527 ib.ptr[ib.length_dw++] = 1; /* z */
1528 ib.ptr[ib.length_dw++] =
1529 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1530
1531 /* write CS partial flush packet */
1532 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1533 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1534
1535 /* SGPR1 */
1536 /* write the register state for the compute dispatch */
1537 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1538 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1539 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1540 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1541 }
1542 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1543 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1544 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1545 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1546 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1547 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1548
1549 /* write dispatch packet */
1550 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1551 ib.ptr[ib.length_dw++] = 8; /* x */
1552 ib.ptr[ib.length_dw++] = 1; /* y */
1553 ib.ptr[ib.length_dw++] = 1; /* z */
1554 ib.ptr[ib.length_dw++] =
1555 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1556
1557 /* write CS partial flush packet */
1558 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1559 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1560
1561 /* SGPR2 */
1562 /* write the register state for the compute dispatch */
1563 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1564 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1565 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1566 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1567 }
1568 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1569 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1570 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1571 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1572 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1573 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1574
1575 /* write dispatch packet */
1576 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1577 ib.ptr[ib.length_dw++] = 8; /* x */
1578 ib.ptr[ib.length_dw++] = 1; /* y */
1579 ib.ptr[ib.length_dw++] = 1; /* z */
1580 ib.ptr[ib.length_dw++] =
1581 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1582
1583 /* write CS partial flush packet */
1584 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1585 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1586
1587 /* shedule the ib on the ring */
c5637837 1588 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
ccba7691
AD
1589 if (r) {
1590 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1591 goto fail;
1592 }
1593
1594 /* wait for the GPU to finish processing the IB */
1595 r = fence_wait(f, false);
1596 if (r) {
1597 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1598 goto fail;
1599 }
1600
1601 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1602 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1603 WREG32(mmGB_EDC_MODE, tmp);
1604
1605 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1606 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1607 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1608
1609
1610 /* read back registers to clear the counters */
1611 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1612 RREG32(sec_ded_counter_registers[i]);
1613
1614fail:
1615 fence_put(f);
cc55c45d 1616 amdgpu_ib_free(adev, &ib, NULL);
73cfa5f5 1617 fence_put(f);
ccba7691
AD
1618
1619 return r;
1620}
1621
68182d90 1622static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1623{
1624 u32 gb_addr_config;
1625 u32 mc_shared_chmap, mc_arb_ramcfg;
1626 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1627 u32 tmp;
68182d90 1628 int ret;
0bde3a95
AD
1629
1630 switch (adev->asic_type) {
1631 case CHIP_TOPAZ:
1632 adev->gfx.config.max_shader_engines = 1;
1633 adev->gfx.config.max_tile_pipes = 2;
1634 adev->gfx.config.max_cu_per_sh = 6;
1635 adev->gfx.config.max_sh_per_se = 1;
1636 adev->gfx.config.max_backends_per_se = 2;
1637 adev->gfx.config.max_texture_channel_caches = 2;
1638 adev->gfx.config.max_gprs = 256;
1639 adev->gfx.config.max_gs_threads = 32;
1640 adev->gfx.config.max_hw_contexts = 8;
1641
1642 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1643 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1644 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1645 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1646 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1647 break;
1648 case CHIP_FIJI:
1649 adev->gfx.config.max_shader_engines = 4;
1650 adev->gfx.config.max_tile_pipes = 16;
1651 adev->gfx.config.max_cu_per_sh = 16;
1652 adev->gfx.config.max_sh_per_se = 1;
1653 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1654 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1655 adev->gfx.config.max_gprs = 256;
1656 adev->gfx.config.max_gs_threads = 32;
1657 adev->gfx.config.max_hw_contexts = 8;
1658
68182d90
FC
1659 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1660 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1661 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1662 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1663 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1664 break;
2cc0c0b5 1665 case CHIP_POLARIS11:
68182d90
FC
1666 ret = amdgpu_atombios_get_gfx_info(adev);
1667 if (ret)
1668 return ret;
1669 adev->gfx.config.max_gprs = 256;
1670 adev->gfx.config.max_gs_threads = 32;
1671 adev->gfx.config.max_hw_contexts = 8;
1672
1673 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1674 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1675 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1676 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1677 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1678 break;
2cc0c0b5 1679 case CHIP_POLARIS10:
68182d90
FC
1680 ret = amdgpu_atombios_get_gfx_info(adev);
1681 if (ret)
1682 return ret;
1683 adev->gfx.config.max_gprs = 256;
1684 adev->gfx.config.max_gs_threads = 32;
1685 adev->gfx.config.max_hw_contexts = 8;
1686
0bde3a95
AD
1687 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1692 break;
1693 case CHIP_TONGA:
1694 adev->gfx.config.max_shader_engines = 4;
1695 adev->gfx.config.max_tile_pipes = 8;
1696 adev->gfx.config.max_cu_per_sh = 8;
1697 adev->gfx.config.max_sh_per_se = 1;
1698 adev->gfx.config.max_backends_per_se = 2;
1699 adev->gfx.config.max_texture_channel_caches = 8;
1700 adev->gfx.config.max_gprs = 256;
1701 adev->gfx.config.max_gs_threads = 32;
1702 adev->gfx.config.max_hw_contexts = 8;
1703
1704 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1705 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1706 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1707 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1708 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1709 break;
1710 case CHIP_CARRIZO:
1711 adev->gfx.config.max_shader_engines = 1;
1712 adev->gfx.config.max_tile_pipes = 2;
1713 adev->gfx.config.max_sh_per_se = 1;
1714 adev->gfx.config.max_backends_per_se = 2;
1715
1716 switch (adev->pdev->revision) {
1717 case 0xc4:
1718 case 0x84:
1719 case 0xc8:
1720 case 0xcc:
b8b339ea
AD
1721 case 0xe1:
1722 case 0xe3:
0bde3a95
AD
1723 /* B10 */
1724 adev->gfx.config.max_cu_per_sh = 8;
1725 break;
1726 case 0xc5:
1727 case 0x81:
1728 case 0x85:
1729 case 0xc9:
1730 case 0xcd:
b8b339ea
AD
1731 case 0xe2:
1732 case 0xe4:
0bde3a95
AD
1733 /* B8 */
1734 adev->gfx.config.max_cu_per_sh = 6;
1735 break;
1736 case 0xc6:
1737 case 0xca:
1738 case 0xce:
b8b339ea 1739 case 0x88:
0bde3a95
AD
1740 /* B6 */
1741 adev->gfx.config.max_cu_per_sh = 6;
1742 break;
1743 case 0xc7:
1744 case 0x87:
1745 case 0xcb:
b8b339ea
AD
1746 case 0xe5:
1747 case 0x89:
0bde3a95
AD
1748 default:
1749 /* B4 */
1750 adev->gfx.config.max_cu_per_sh = 4;
1751 break;
1752 }
1753
1754 adev->gfx.config.max_texture_channel_caches = 2;
1755 adev->gfx.config.max_gprs = 256;
1756 adev->gfx.config.max_gs_threads = 32;
1757 adev->gfx.config.max_hw_contexts = 8;
1758
e3c7656c
SL
1759 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1760 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1761 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1762 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1763 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1764 break;
1765 case CHIP_STONEY:
1766 adev->gfx.config.max_shader_engines = 1;
1767 adev->gfx.config.max_tile_pipes = 2;
1768 adev->gfx.config.max_sh_per_se = 1;
1769 adev->gfx.config.max_backends_per_se = 1;
1770
1771 switch (adev->pdev->revision) {
1772 case 0xc0:
1773 case 0xc1:
1774 case 0xc2:
1775 case 0xc4:
1776 case 0xc8:
1777 case 0xc9:
1778 adev->gfx.config.max_cu_per_sh = 3;
1779 break;
1780 case 0xd0:
1781 case 0xd1:
1782 case 0xd2:
1783 default:
1784 adev->gfx.config.max_cu_per_sh = 2;
1785 break;
1786 }
1787
1788 adev->gfx.config.max_texture_channel_caches = 2;
1789 adev->gfx.config.max_gprs = 256;
1790 adev->gfx.config.max_gs_threads = 16;
1791 adev->gfx.config.max_hw_contexts = 8;
1792
0bde3a95
AD
1793 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1798 break;
1799 default:
1800 adev->gfx.config.max_shader_engines = 2;
1801 adev->gfx.config.max_tile_pipes = 4;
1802 adev->gfx.config.max_cu_per_sh = 2;
1803 adev->gfx.config.max_sh_per_se = 1;
1804 adev->gfx.config.max_backends_per_se = 2;
1805 adev->gfx.config.max_texture_channel_caches = 4;
1806 adev->gfx.config.max_gprs = 256;
1807 adev->gfx.config.max_gs_threads = 32;
1808 adev->gfx.config.max_hw_contexts = 8;
1809
1810 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1815 break;
1816 }
1817
1818 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1819 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1820 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1821
1822 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1823 adev->gfx.config.mem_max_burst_length_bytes = 256;
1824 if (adev->flags & AMD_IS_APU) {
1825 /* Get memory bank mapping mode. */
1826 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1827 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1828 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1829
1830 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1831 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1832 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1833
1834 /* Validate settings in case only one DIMM installed. */
1835 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1836 dimm00_addr_map = 0;
1837 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1838 dimm01_addr_map = 0;
1839 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1840 dimm10_addr_map = 0;
1841 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1842 dimm11_addr_map = 0;
1843
1844 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1845 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1846 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1847 adev->gfx.config.mem_row_size_in_kb = 2;
1848 else
1849 adev->gfx.config.mem_row_size_in_kb = 1;
1850 } else {
1851 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1852 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1853 if (adev->gfx.config.mem_row_size_in_kb > 4)
1854 adev->gfx.config.mem_row_size_in_kb = 4;
1855 }
1856
1857 adev->gfx.config.shader_engine_tile_size = 32;
1858 adev->gfx.config.num_gpus = 1;
1859 adev->gfx.config.multi_gpu_tile_size = 64;
1860
1861 /* fix up row size */
1862 switch (adev->gfx.config.mem_row_size_in_kb) {
1863 case 1:
1864 default:
1865 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1866 break;
1867 case 2:
1868 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1869 break;
1870 case 4:
1871 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1872 break;
1873 }
1874 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
1875
1876 return 0;
0bde3a95
AD
1877}
1878
5fc3aeeb 1879static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
1880{
1881 int i, r;
1882 struct amdgpu_ring *ring;
5fc3aeeb 1883 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
1884
1885 /* EOP Event */
1886 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1887 if (r)
1888 return r;
1889
1890 /* Privileged reg */
1891 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1892 if (r)
1893 return r;
1894
1895 /* Privileged inst */
1896 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1897 if (r)
1898 return r;
1899
1900 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1901
1902 gfx_v8_0_scratch_init(adev);
1903
1904 r = gfx_v8_0_init_microcode(adev);
1905 if (r) {
1906 DRM_ERROR("Failed to load gfx firmware!\n");
1907 return r;
1908 }
1909
2b6cd977
EH
1910 r = gfx_v8_0_rlc_init(adev);
1911 if (r) {
1912 DRM_ERROR("Failed to init rlc BOs!\n");
1913 return r;
1914 }
1915
aaa36a97
AD
1916 r = gfx_v8_0_mec_init(adev);
1917 if (r) {
1918 DRM_ERROR("Failed to init MEC BOs!\n");
1919 return r;
1920 }
1921
aaa36a97
AD
1922 /* set up the gfx ring */
1923 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1924 ring = &adev->gfx.gfx_ring[i];
1925 ring->ring_obj = NULL;
1926 sprintf(ring->name, "gfx");
1927 /* no gfx doorbells on iceland */
1928 if (adev->asic_type != CHIP_TOPAZ) {
1929 ring->use_doorbell = true;
1930 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1931 }
1932
2800de2e 1933 r = amdgpu_ring_init(adev, ring, 1024,
aaa36a97
AD
1934 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1935 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1936 AMDGPU_RING_TYPE_GFX);
1937 if (r)
1938 return r;
1939 }
1940
1941 /* set up the compute queues */
1942 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1943 unsigned irq_type;
1944
1945 /* max 32 queues per MEC */
1946 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1947 DRM_ERROR("Too many (%d) compute rings!\n", i);
1948 break;
1949 }
1950 ring = &adev->gfx.compute_ring[i];
1951 ring->ring_obj = NULL;
1952 ring->use_doorbell = true;
1953 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1954 ring->me = 1; /* first MEC */
1955 ring->pipe = i / 8;
1956 ring->queue = i % 8;
771c8ec1 1957 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
aaa36a97
AD
1958 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1959 /* type-2 packets are deprecated on MEC, use type-3 instead */
2800de2e 1960 r = amdgpu_ring_init(adev, ring, 1024,
aaa36a97
AD
1961 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1962 &adev->gfx.eop_irq, irq_type,
1963 AMDGPU_RING_TYPE_COMPUTE);
1964 if (r)
1965 return r;
1966 }
1967
1968 /* reserve GDS, GWS and OA resource for gfx */
1969 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1970 PAGE_SIZE, true,
72d7668b 1971 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
aaa36a97
AD
1972 NULL, &adev->gds.gds_gfx_bo);
1973 if (r)
1974 return r;
1975
1976 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1977 PAGE_SIZE, true,
72d7668b 1978 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
aaa36a97
AD
1979 NULL, &adev->gds.gws_gfx_bo);
1980 if (r)
1981 return r;
1982
1983 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1984 PAGE_SIZE, true,
72d7668b 1985 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
aaa36a97
AD
1986 NULL, &adev->gds.oa_gfx_bo);
1987 if (r)
1988 return r;
1989
a101a899
KW
1990 adev->gfx.ce_ram_size = 0x8000;
1991
68182d90
FC
1992 r = gfx_v8_0_gpu_early_init(adev);
1993 if (r)
1994 return r;
0bde3a95 1995
aaa36a97
AD
1996 return 0;
1997}
1998
5fc3aeeb 1999static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2000{
2001 int i;
5fc3aeeb 2002 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
2003
2004 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2005 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2006 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2007
2008 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2009 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2010 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2011 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2012
aaa36a97
AD
2013 gfx_v8_0_mec_fini(adev);
2014
2b6cd977
EH
2015 gfx_v8_0_rlc_fini(adev);
2016
13331ac3 2017 gfx_v8_0_free_microcode(adev);
2b6cd977 2018
aaa36a97
AD
2019 return 0;
2020}
2021
2022static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2023{
90bea0ab 2024 uint32_t *modearray, *mod2array;
eb64526f
TSD
2025 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2026 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2027 u32 reg_offset;
aaa36a97 2028
90bea0ab
TSD
2029 modearray = adev->gfx.config.tile_mode_array;
2030 mod2array = adev->gfx.config.macrotile_mode_array;
2031
2032 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2033 modearray[reg_offset] = 0;
2034
2035 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2036 mod2array[reg_offset] = 0;
aaa36a97
AD
2037
2038 switch (adev->asic_type) {
2039 case CHIP_TOPAZ:
90bea0ab
TSD
2040 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2041 PIPE_CONFIG(ADDR_SURF_P2) |
2042 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2043 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2044 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045 PIPE_CONFIG(ADDR_SURF_P2) |
2046 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2047 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2048 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049 PIPE_CONFIG(ADDR_SURF_P2) |
2050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2051 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2052 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2053 PIPE_CONFIG(ADDR_SURF_P2) |
2054 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2055 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2056 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2057 PIPE_CONFIG(ADDR_SURF_P2) |
2058 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2059 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2060 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061 PIPE_CONFIG(ADDR_SURF_P2) |
2062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2063 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2064 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2065 PIPE_CONFIG(ADDR_SURF_P2) |
2066 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2067 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2068 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2069 PIPE_CONFIG(ADDR_SURF_P2));
2070 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2071 PIPE_CONFIG(ADDR_SURF_P2) |
2072 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2075 PIPE_CONFIG(ADDR_SURF_P2) |
2076 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2078 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2079 PIPE_CONFIG(ADDR_SURF_P2) |
2080 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2082 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083 PIPE_CONFIG(ADDR_SURF_P2) |
2084 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087 PIPE_CONFIG(ADDR_SURF_P2) |
2088 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2091 PIPE_CONFIG(ADDR_SURF_P2) |
2092 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2095 PIPE_CONFIG(ADDR_SURF_P2) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2098 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2099 PIPE_CONFIG(ADDR_SURF_P2) |
2100 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2102 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2103 PIPE_CONFIG(ADDR_SURF_P2) |
2104 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2106 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2107 PIPE_CONFIG(ADDR_SURF_P2) |
2108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2110 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2111 PIPE_CONFIG(ADDR_SURF_P2) |
2112 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2114 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2115 PIPE_CONFIG(ADDR_SURF_P2) |
2116 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2118 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2119 PIPE_CONFIG(ADDR_SURF_P2) |
2120 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2122 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2123 PIPE_CONFIG(ADDR_SURF_P2) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2126 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2127 PIPE_CONFIG(ADDR_SURF_P2) |
2128 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2130 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2131 PIPE_CONFIG(ADDR_SURF_P2) |
2132 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2134 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135 PIPE_CONFIG(ADDR_SURF_P2) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2139 PIPE_CONFIG(ADDR_SURF_P2) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2142
2143 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2146 NUM_BANKS(ADDR_SURF_8_BANK));
2147 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2150 NUM_BANKS(ADDR_SURF_8_BANK));
2151 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2154 NUM_BANKS(ADDR_SURF_8_BANK));
2155 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2158 NUM_BANKS(ADDR_SURF_8_BANK));
2159 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2162 NUM_BANKS(ADDR_SURF_8_BANK));
2163 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2166 NUM_BANKS(ADDR_SURF_8_BANK));
2167 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170 NUM_BANKS(ADDR_SURF_8_BANK));
2171 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2174 NUM_BANKS(ADDR_SURF_16_BANK));
2175 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2178 NUM_BANKS(ADDR_SURF_16_BANK));
2179 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2182 NUM_BANKS(ADDR_SURF_16_BANK));
2183 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2186 NUM_BANKS(ADDR_SURF_16_BANK));
2187 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2190 NUM_BANKS(ADDR_SURF_16_BANK));
2191 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2194 NUM_BANKS(ADDR_SURF_16_BANK));
2195 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2198 NUM_BANKS(ADDR_SURF_8_BANK));
2199
2200 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2201 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2202 reg_offset != 23)
2203 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2204
2205 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2206 if (reg_offset != 7)
2207 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2208
8cdacf44 2209 break;
af15a2d5 2210 case CHIP_FIJI:
90bea0ab
TSD
2211 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2214 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2215 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2216 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2217 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2218 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2219 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2223 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2229 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2230 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2232 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2233 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2234 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2236 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2237 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2238 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2240 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2241 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2242 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2244 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2245 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2246 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2249 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2257 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2258 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2261 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2262 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2265 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2269 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2270 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2274 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2277 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2281 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2282 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2283 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2287 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2290 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2291 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2294 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2295 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2298 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2302 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2303 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2310 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2317 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2329 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2333
2334 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337 NUM_BANKS(ADDR_SURF_8_BANK));
2338 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341 NUM_BANKS(ADDR_SURF_8_BANK));
2342 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2345 NUM_BANKS(ADDR_SURF_8_BANK));
2346 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349 NUM_BANKS(ADDR_SURF_8_BANK));
2350 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2353 NUM_BANKS(ADDR_SURF_8_BANK));
2354 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2357 NUM_BANKS(ADDR_SURF_8_BANK));
2358 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2360 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2361 NUM_BANKS(ADDR_SURF_8_BANK));
2362 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365 NUM_BANKS(ADDR_SURF_8_BANK));
2366 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2369 NUM_BANKS(ADDR_SURF_8_BANK));
2370 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2373 NUM_BANKS(ADDR_SURF_8_BANK));
2374 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2377 NUM_BANKS(ADDR_SURF_8_BANK));
2378 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381 NUM_BANKS(ADDR_SURF_8_BANK));
2382 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385 NUM_BANKS(ADDR_SURF_8_BANK));
2386 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2389 NUM_BANKS(ADDR_SURF_4_BANK));
2390
2391 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2392 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2393
2394 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2395 if (reg_offset != 7)
2396 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2397
5f2e816b 2398 break;
aaa36a97 2399 case CHIP_TONGA:
90bea0ab
TSD
2400 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2402 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2403 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2406 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2410 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2426 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2430 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2434 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2450 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2462 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2470 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2471 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2472 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2475 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2476 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2479 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2480 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2483 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2487 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2491 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2499 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2503 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2507 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2509 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2513 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2514 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2515 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2517 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2518 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2519 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2520 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2521 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2522
2523 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2526 NUM_BANKS(ADDR_SURF_16_BANK));
2527 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2530 NUM_BANKS(ADDR_SURF_16_BANK));
2531 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534 NUM_BANKS(ADDR_SURF_16_BANK));
2535 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538 NUM_BANKS(ADDR_SURF_16_BANK));
2539 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542 NUM_BANKS(ADDR_SURF_16_BANK));
2543 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2546 NUM_BANKS(ADDR_SURF_16_BANK));
2547 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2550 NUM_BANKS(ADDR_SURF_16_BANK));
2551 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554 NUM_BANKS(ADDR_SURF_16_BANK));
2555 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558 NUM_BANKS(ADDR_SURF_16_BANK));
2559 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562 NUM_BANKS(ADDR_SURF_16_BANK));
2563 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2566 NUM_BANKS(ADDR_SURF_16_BANK));
2567 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2569 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2570 NUM_BANKS(ADDR_SURF_8_BANK));
2571 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2573 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2574 NUM_BANKS(ADDR_SURF_4_BANK));
2575 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 NUM_BANKS(ADDR_SURF_4_BANK));
2579
2580 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2581 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2582
2583 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2584 if (reg_offset != 7)
2585 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2586
68182d90 2587 break;
2cc0c0b5 2588 case CHIP_POLARIS11:
68182d90
FC
2589 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2592 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2593 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2596 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2597 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2600 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2601 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2603 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2604 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2608 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2612 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2616 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2618 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2620 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2622 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2623 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2635 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2636 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2637 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2638 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2639 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2646 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2648 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2650 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2659 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2660 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2661 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2663 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2664 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2666 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2667 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2668 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2669 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2670 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2671 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2672 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2674 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2675 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2676 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2678 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2680 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2686 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2688 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2692 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2696 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2698 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2702 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2707 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2711
2712 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2714 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2715 NUM_BANKS(ADDR_SURF_16_BANK));
2716
2717 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2719 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2720 NUM_BANKS(ADDR_SURF_16_BANK));
2721
2722 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2725 NUM_BANKS(ADDR_SURF_16_BANK));
2726
2727 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2729 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730 NUM_BANKS(ADDR_SURF_16_BANK));
2731
2732 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735 NUM_BANKS(ADDR_SURF_16_BANK));
2736
2737 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2740 NUM_BANKS(ADDR_SURF_16_BANK));
2741
2742 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2745 NUM_BANKS(ADDR_SURF_16_BANK));
2746
2747 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2748 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2749 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750 NUM_BANKS(ADDR_SURF_16_BANK));
2751
2752 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2753 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2754 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2755 NUM_BANKS(ADDR_SURF_16_BANK));
2756
2757 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2759 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2760 NUM_BANKS(ADDR_SURF_16_BANK));
2761
2762 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765 NUM_BANKS(ADDR_SURF_16_BANK));
2766
2767 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2770 NUM_BANKS(ADDR_SURF_16_BANK));
2771
2772 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2775 NUM_BANKS(ADDR_SURF_8_BANK));
2776
2777 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2779 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2780 NUM_BANKS(ADDR_SURF_4_BANK));
2781
2782 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2783 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2784
2785 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2786 if (reg_offset != 7)
2787 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2788
2789 break;
2cc0c0b5 2790 case CHIP_POLARIS10:
68182d90
FC
2791 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2793 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2794 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2797 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2798 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2801 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2805 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2806 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2809 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2810 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2813 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2817 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2824 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2825 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2826 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2827 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2828 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2829 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2831 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2834 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2835 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2837 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2842 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2843 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2847 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2850 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2851 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2854 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2855 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2857 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2862 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2863 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2865 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2867 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2868 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2870 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2871 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2874 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2875 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2878 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2879 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2886 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2888 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2890 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2892 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2894 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2901 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2909 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913
2914 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917 NUM_BANKS(ADDR_SURF_16_BANK));
2918
2919 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922 NUM_BANKS(ADDR_SURF_16_BANK));
2923
2924 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2926 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2927 NUM_BANKS(ADDR_SURF_16_BANK));
2928
2929 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2931 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2932 NUM_BANKS(ADDR_SURF_16_BANK));
2933
2934 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2937 NUM_BANKS(ADDR_SURF_16_BANK));
2938
2939 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2942 NUM_BANKS(ADDR_SURF_16_BANK));
2943
2944 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2947 NUM_BANKS(ADDR_SURF_16_BANK));
2948
2949 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2952 NUM_BANKS(ADDR_SURF_16_BANK));
2953
2954 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2957 NUM_BANKS(ADDR_SURF_16_BANK));
2958
2959 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2961 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2962 NUM_BANKS(ADDR_SURF_16_BANK));
2963
2964 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2967 NUM_BANKS(ADDR_SURF_16_BANK));
2968
2969 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2972 NUM_BANKS(ADDR_SURF_8_BANK));
2973
2974 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2977 NUM_BANKS(ADDR_SURF_4_BANK));
2978
2979 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2982 NUM_BANKS(ADDR_SURF_4_BANK));
2983
2984 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2985 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2986
2987 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2988 if (reg_offset != 7)
2989 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2990
aaa36a97 2991 break;
e3c7656c 2992 case CHIP_STONEY:
90bea0ab
TSD
2993 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2994 PIPE_CONFIG(ADDR_SURF_P2) |
2995 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2996 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2997 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 PIPE_CONFIG(ADDR_SURF_P2) |
2999 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3000 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002 PIPE_CONFIG(ADDR_SURF_P2) |
3003 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006 PIPE_CONFIG(ADDR_SURF_P2) |
3007 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3008 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010 PIPE_CONFIG(ADDR_SURF_P2) |
3011 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3014 PIPE_CONFIG(ADDR_SURF_P2) |
3015 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3018 PIPE_CONFIG(ADDR_SURF_P2) |
3019 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3022 PIPE_CONFIG(ADDR_SURF_P2));
3023 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P2) |
3025 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3027 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028 PIPE_CONFIG(ADDR_SURF_P2) |
3029 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3030 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3031 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3032 PIPE_CONFIG(ADDR_SURF_P2) |
3033 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3035 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3036 PIPE_CONFIG(ADDR_SURF_P2) |
3037 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040 PIPE_CONFIG(ADDR_SURF_P2) |
3041 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3043 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3044 PIPE_CONFIG(ADDR_SURF_P2) |
3045 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3046 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3047 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048 PIPE_CONFIG(ADDR_SURF_P2) |
3049 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3051 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3052 PIPE_CONFIG(ADDR_SURF_P2) |
3053 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3055 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3056 PIPE_CONFIG(ADDR_SURF_P2) |
3057 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3058 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3059 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3060 PIPE_CONFIG(ADDR_SURF_P2) |
3061 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3063 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3064 PIPE_CONFIG(ADDR_SURF_P2) |
3065 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3067 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3068 PIPE_CONFIG(ADDR_SURF_P2) |
3069 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3072 PIPE_CONFIG(ADDR_SURF_P2) |
3073 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3076 PIPE_CONFIG(ADDR_SURF_P2) |
3077 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3080 PIPE_CONFIG(ADDR_SURF_P2) |
3081 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3084 PIPE_CONFIG(ADDR_SURF_P2) |
3085 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3087 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3088 PIPE_CONFIG(ADDR_SURF_P2) |
3089 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3091 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3092 PIPE_CONFIG(ADDR_SURF_P2) |
3093 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3095
3096 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3097 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3098 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3099 NUM_BANKS(ADDR_SURF_8_BANK));
3100 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3102 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3103 NUM_BANKS(ADDR_SURF_8_BANK));
3104 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3105 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3106 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3107 NUM_BANKS(ADDR_SURF_8_BANK));
3108 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3110 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3111 NUM_BANKS(ADDR_SURF_8_BANK));
3112 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3115 NUM_BANKS(ADDR_SURF_8_BANK));
3116 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3119 NUM_BANKS(ADDR_SURF_8_BANK));
3120 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3122 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3123 NUM_BANKS(ADDR_SURF_8_BANK));
3124 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3125 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3126 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3127 NUM_BANKS(ADDR_SURF_16_BANK));
3128 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131 NUM_BANKS(ADDR_SURF_16_BANK));
3132 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3135 NUM_BANKS(ADDR_SURF_16_BANK));
3136 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3139 NUM_BANKS(ADDR_SURF_16_BANK));
3140 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3143 NUM_BANKS(ADDR_SURF_16_BANK));
3144 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147 NUM_BANKS(ADDR_SURF_16_BANK));
3148 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3151 NUM_BANKS(ADDR_SURF_8_BANK));
3152
3153 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3154 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3155 reg_offset != 23)
3156 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3157
3158 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3159 if (reg_offset != 7)
3160 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3161
e3c7656c 3162 break;
aaa36a97 3163 default:
90bea0ab
TSD
3164 dev_warn(adev->dev,
3165 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3166 adev->asic_type);
3167
3168 case CHIP_CARRIZO:
3169 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170 PIPE_CONFIG(ADDR_SURF_P2) |
3171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174 PIPE_CONFIG(ADDR_SURF_P2) |
3175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178 PIPE_CONFIG(ADDR_SURF_P2) |
3179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182 PIPE_CONFIG(ADDR_SURF_P2) |
3183 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3186 PIPE_CONFIG(ADDR_SURF_P2) |
3187 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3189 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3190 PIPE_CONFIG(ADDR_SURF_P2) |
3191 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3193 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3194 PIPE_CONFIG(ADDR_SURF_P2) |
3195 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3196 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3197 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198 PIPE_CONFIG(ADDR_SURF_P2));
3199 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200 PIPE_CONFIG(ADDR_SURF_P2) |
3201 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204 PIPE_CONFIG(ADDR_SURF_P2) |
3205 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3208 PIPE_CONFIG(ADDR_SURF_P2) |
3209 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3211 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3212 PIPE_CONFIG(ADDR_SURF_P2) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216 PIPE_CONFIG(ADDR_SURF_P2) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3219 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3220 PIPE_CONFIG(ADDR_SURF_P2) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224 PIPE_CONFIG(ADDR_SURF_P2) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3227 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3228 PIPE_CONFIG(ADDR_SURF_P2) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3232 PIPE_CONFIG(ADDR_SURF_P2) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3236 PIPE_CONFIG(ADDR_SURF_P2) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3240 PIPE_CONFIG(ADDR_SURF_P2) |
3241 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3244 PIPE_CONFIG(ADDR_SURF_P2) |
3245 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3248 PIPE_CONFIG(ADDR_SURF_P2) |
3249 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3251 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3252 PIPE_CONFIG(ADDR_SURF_P2) |
3253 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3255 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3256 PIPE_CONFIG(ADDR_SURF_P2) |
3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3259 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3260 PIPE_CONFIG(ADDR_SURF_P2) |
3261 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3264 PIPE_CONFIG(ADDR_SURF_P2) |
3265 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3268 PIPE_CONFIG(ADDR_SURF_P2) |
3269 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3271
3272 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3275 NUM_BANKS(ADDR_SURF_8_BANK));
3276 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279 NUM_BANKS(ADDR_SURF_8_BANK));
3280 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 NUM_BANKS(ADDR_SURF_8_BANK));
3284 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287 NUM_BANKS(ADDR_SURF_8_BANK));
3288 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3291 NUM_BANKS(ADDR_SURF_8_BANK));
3292 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3295 NUM_BANKS(ADDR_SURF_8_BANK));
3296 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3299 NUM_BANKS(ADDR_SURF_8_BANK));
3300 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303 NUM_BANKS(ADDR_SURF_16_BANK));
3304 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 NUM_BANKS(ADDR_SURF_16_BANK));
3308 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311 NUM_BANKS(ADDR_SURF_16_BANK));
3312 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315 NUM_BANKS(ADDR_SURF_16_BANK));
3316 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 NUM_BANKS(ADDR_SURF_16_BANK));
3320 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3323 NUM_BANKS(ADDR_SURF_16_BANK));
3324 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327 NUM_BANKS(ADDR_SURF_8_BANK));
3328
3329 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3330 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3331 reg_offset != 23)
3332 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3333
3334 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3335 if (reg_offset != 7)
3336 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3337
3338 break;
aaa36a97
AD
3339 }
3340}
3341
aaa36a97
AD
3342void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3343{
3344 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3345
3346 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3347 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3348 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3349 } else if (se_num == 0xffffffff) {
3350 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3351 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3352 } else if (sh_num == 0xffffffff) {
3353 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3354 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3355 } else {
3356 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3357 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3358 }
3359 WREG32(mmGRBM_GFX_INDEX, data);
3360}
3361
8f8e00c1
AD
3362static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3363{
3364 return (u32)((1ULL << bit_width) - 1);
3365}
3366
3367static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3368{
3369 u32 data, mask;
3370
3371 data = RREG32(mmCC_RB_BACKEND_DISABLE);
aaa36a97
AD
3372 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3373
8f8e00c1 3374 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
aaa36a97
AD
3375 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3376
8f8e00c1
AD
3377 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3378 adev->gfx.config.max_sh_per_se);
aaa36a97 3379
8f8e00c1 3380 return (~data) & mask;
aaa36a97
AD
3381}
3382
8f8e00c1 3383static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3384{
3385 int i, j;
aac1e3ca 3386 u32 data;
8f8e00c1 3387 u32 active_rbs = 0;
6157bd7a
FC
3388 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3389 adev->gfx.config.max_sh_per_se;
aaa36a97
AD
3390
3391 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3392 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3393 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
aaa36a97 3394 gfx_v8_0_select_se_sh(adev, i, j);
8f8e00c1
AD
3395 data = gfx_v8_0_get_rb_active_bitmap(adev);
3396 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3397 rb_bitmap_width_per_sh);
aaa36a97
AD
3398 }
3399 }
3400 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3401 mutex_unlock(&adev->grbm_idx_mutex);
3402
8f8e00c1 3403 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3404 adev->gfx.config.num_rbs = hweight32(active_rbs);
aaa36a97
AD
3405}
3406
cd06bf68 3407/**
35c7a952 3408 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
3409 *
3410 * @rdev: amdgpu_device pointer
3411 *
3412 * Initialize compute vmid sh_mem registers
3413 *
3414 */
3415#define DEFAULT_SH_MEM_BASES (0x6000)
3416#define FIRST_COMPUTE_VMID (8)
3417#define LAST_COMPUTE_VMID (16)
35c7a952 3418static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3419{
3420 int i;
3421 uint32_t sh_mem_config;
3422 uint32_t sh_mem_bases;
3423
3424 /*
3425 * Configure apertures:
3426 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3427 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3428 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3429 */
3430 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3431
3432 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3433 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3434 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3435 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3436 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3437 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3438
3439 mutex_lock(&adev->srbm_mutex);
3440 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3441 vi_srbm_select(adev, 0, 0, 0, i);
3442 /* CP and shaders */
3443 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3444 WREG32(mmSH_MEM_APE1_BASE, 1);
3445 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3446 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3447 }
3448 vi_srbm_select(adev, 0, 0, 0, 0);
3449 mutex_unlock(&adev->srbm_mutex);
3450}
3451
aaa36a97
AD
3452static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3453{
aaa36a97
AD
3454 u32 tmp;
3455 int i;
3456
aaa36a97
AD
3457 tmp = RREG32(mmGRBM_CNTL);
3458 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3459 WREG32(mmGRBM_CNTL, tmp);
3460
0bde3a95
AD
3461 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3462 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3463 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3464
3465 gfx_v8_0_tiling_mode_table_init(adev);
3466
8f8e00c1 3467 gfx_v8_0_setup_rb(adev);
7dae69a2 3468 gfx_v8_0_get_cu_info(adev);
aaa36a97
AD
3469
3470 /* XXX SH_MEM regs */
3471 /* where to put LDS, scratch, GPUVM in FSA64 space */
3472 mutex_lock(&adev->srbm_mutex);
3473 for (i = 0; i < 16; i++) {
3474 vi_srbm_select(adev, 0, 0, 0, i);
3475 /* CP and shaders */
3476 if (i == 0) {
3477 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3478 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3479 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3480 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
3481 WREG32(mmSH_MEM_CONFIG, tmp);
3482 } else {
3483 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3484 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
0bde3a95 3485 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3486 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
3487 WREG32(mmSH_MEM_CONFIG, tmp);
3488 }
3489
3490 WREG32(mmSH_MEM_APE1_BASE, 1);
3491 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3492 WREG32(mmSH_MEM_BASES, 0);
3493 }
3494 vi_srbm_select(adev, 0, 0, 0, 0);
3495 mutex_unlock(&adev->srbm_mutex);
3496
35c7a952 3497 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3498
aaa36a97
AD
3499 mutex_lock(&adev->grbm_idx_mutex);
3500 /*
3501 * making sure that the following register writes will be broadcasted
3502 * to all the shaders
3503 */
3504 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3505
3506 WREG32(mmPA_SC_FIFO_SIZE,
3507 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3508 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3509 (adev->gfx.config.sc_prim_fifo_size_backend <<
3510 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3511 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3512 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3513 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3514 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3515 mutex_unlock(&adev->grbm_idx_mutex);
3516
3517}
3518
3519static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3520{
3521 u32 i, j, k;
3522 u32 mask;
3523
3524 mutex_lock(&adev->grbm_idx_mutex);
3525 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3526 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3527 gfx_v8_0_select_se_sh(adev, i, j);
3528 for (k = 0; k < adev->usec_timeout; k++) {
3529 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3530 break;
3531 udelay(1);
3532 }
3533 }
3534 }
3535 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3536 mutex_unlock(&adev->grbm_idx_mutex);
3537
3538 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3539 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3540 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3541 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3542 for (k = 0; k < adev->usec_timeout; k++) {
3543 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3544 break;
3545 udelay(1);
3546 }
3547}
3548
3549static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3550 bool enable)
3551{
3552 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3553
0d07db7e
TSD
3554 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3555 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3556 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3557 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3558
aaa36a97
AD
3559 WREG32(mmCP_INT_CNTL_RING0, tmp);
3560}
3561
2b6cd977
EH
3562static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3563{
3564 /* csib */
3565 WREG32(mmRLC_CSIB_ADDR_HI,
3566 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3567 WREG32(mmRLC_CSIB_ADDR_LO,
3568 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3569 WREG32(mmRLC_CSIB_LENGTH,
3570 adev->gfx.rlc.clear_state_size);
3571}
3572
3573static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3574 int ind_offset,
3575 int list_size,
3576 int *unique_indices,
3577 int *indices_count,
3578 int max_indices,
3579 int *ind_start_offsets,
3580 int *offset_count,
3581 int max_offset)
3582{
3583 int indices;
3584 bool new_entry = true;
3585
3586 for (; ind_offset < list_size; ind_offset++) {
3587
3588 if (new_entry) {
3589 new_entry = false;
3590 ind_start_offsets[*offset_count] = ind_offset;
3591 *offset_count = *offset_count + 1;
3592 BUG_ON(*offset_count >= max_offset);
3593 }
3594
3595 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3596 new_entry = true;
3597 continue;
3598 }
3599
3600 ind_offset += 2;
3601
3602 /* look for the matching indice */
3603 for (indices = 0;
3604 indices < *indices_count;
3605 indices++) {
3606 if (unique_indices[indices] ==
3607 register_list_format[ind_offset])
3608 break;
3609 }
3610
3611 if (indices >= *indices_count) {
3612 unique_indices[*indices_count] =
3613 register_list_format[ind_offset];
3614 indices = *indices_count;
3615 *indices_count = *indices_count + 1;
3616 BUG_ON(*indices_count >= max_indices);
3617 }
3618
3619 register_list_format[ind_offset] = indices;
3620 }
3621}
3622
3623static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3624{
3625 int i, temp, data;
3626 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3627 int indices_count = 0;
3628 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3629 int offset_count = 0;
3630
3631 int list_size;
3632 unsigned int *register_list_format =
3633 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3634 if (register_list_format == NULL)
3635 return -ENOMEM;
3636 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3637 adev->gfx.rlc.reg_list_format_size_bytes);
3638
3639 gfx_v8_0_parse_ind_reg_list(register_list_format,
3640 RLC_FormatDirectRegListLength,
3641 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3642 unique_indices,
3643 &indices_count,
3644 sizeof(unique_indices) / sizeof(int),
3645 indirect_start_offsets,
3646 &offset_count,
3647 sizeof(indirect_start_offsets)/sizeof(int));
3648
3649 /* save and restore list */
3650 temp = RREG32(mmRLC_SRM_CNTL);
3651 temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3652 WREG32(mmRLC_SRM_CNTL, temp);
3653
3654 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3655 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3656 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3657
3658 /* indirect list */
3659 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3660 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3661 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3662
3663 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3664 list_size = list_size >> 1;
3665 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3666 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3667
3668 /* starting offsets starts */
3669 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3670 adev->gfx.rlc.starting_offsets_start);
3671 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3672 WREG32(mmRLC_GPM_SCRATCH_DATA,
3673 indirect_start_offsets[i]);
3674
3675 /* unique indices */
3676 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3677 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3678 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3679 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3680 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3681 }
3682 kfree(register_list_format);
3683
3684 return 0;
3685}
3686
3687static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3688{
3689 uint32_t data;
3690
3691 data = RREG32(mmRLC_SRM_CNTL);
3692 data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3693 WREG32(mmRLC_SRM_CNTL, data);
3694}
3695
2cc0c0b5 3696static void polaris11_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
3697{
3698 uint32_t data;
3699
3700 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3701 AMD_PG_SUPPORT_GFX_SMG |
3702 AMD_PG_SUPPORT_GFX_DMG)) {
3703 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3704 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3705 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3706 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3707
3708 data = 0;
3709 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3710 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3711 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3712 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3713 WREG32(mmRLC_PG_DELAY, data);
3714
3715 data = RREG32(mmRLC_PG_DELAY_2);
3716 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3717 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3718 WREG32(mmRLC_PG_DELAY_2, data);
3719
3720 data = RREG32(mmRLC_AUTO_PG_CTRL);
3721 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3722 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3723 WREG32(mmRLC_AUTO_PG_CTRL, data);
3724 }
3725}
3726
2b6cd977
EH
3727static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3728{
3729 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3730 AMD_PG_SUPPORT_GFX_SMG |
3731 AMD_PG_SUPPORT_GFX_DMG |
3732 AMD_PG_SUPPORT_CP |
3733 AMD_PG_SUPPORT_GDS |
3734 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3735 gfx_v8_0_init_csb(adev);
3736 gfx_v8_0_init_save_restore_list(adev);
3737 gfx_v8_0_enable_save_restore_machine(adev);
f4bfffdd 3738
2cc0c0b5
FC
3739 if (adev->asic_type == CHIP_POLARIS11)
3740 polaris11_init_power_gating(adev);
2b6cd977
EH
3741 }
3742}
3743
aaa36a97
AD
3744void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3745{
3746 u32 tmp = RREG32(mmRLC_CNTL);
3747
3748 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3749 WREG32(mmRLC_CNTL, tmp);
3750
3751 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3752
3753 gfx_v8_0_wait_for_rlc_serdes(adev);
3754}
3755
3756static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3757{
3758 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3759
3760 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3761 WREG32(mmGRBM_SOFT_RESET, tmp);
3762 udelay(50);
3763 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3764 WREG32(mmGRBM_SOFT_RESET, tmp);
3765 udelay(50);
3766}
3767
3768static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3769{
3770 u32 tmp = RREG32(mmRLC_CNTL);
3771
3772 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3773 WREG32(mmRLC_CNTL, tmp);
3774
3775 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 3776 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
3777 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3778
3779 udelay(50);
3780}
3781
3782static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3783{
3784 const struct rlc_firmware_header_v2_0 *hdr;
3785 const __le32 *fw_data;
3786 unsigned i, fw_size;
3787
3788 if (!adev->gfx.rlc_fw)
3789 return -EINVAL;
3790
3791 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3792 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
3793
3794 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3795 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3796 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3797
3798 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3799 for (i = 0; i < fw_size; i++)
3800 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3801 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3802
3803 return 0;
3804}
3805
3806static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3807{
3808 int r;
3809
3810 gfx_v8_0_rlc_stop(adev);
3811
3812 /* disable CG */
3813 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2cc0c0b5
FC
3814 if (adev->asic_type == CHIP_POLARIS11 ||
3815 adev->asic_type == CHIP_POLARIS10)
68182d90 3816 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
aaa36a97
AD
3817
3818 /* disable PG */
3819 WREG32(mmRLC_PG_CNTL, 0);
3820
3821 gfx_v8_0_rlc_reset(adev);
3822
2b6cd977
EH
3823 gfx_v8_0_init_pg(adev);
3824
e61710c5 3825 if (!adev->pp_enabled) {
ba5c2a87
RZ
3826 if (!adev->firmware.smu_load) {
3827 /* legacy rlc firmware loading */
3828 r = gfx_v8_0_rlc_load_microcode(adev);
3829 if (r)
3830 return r;
3831 } else {
3832 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3833 AMDGPU_UCODE_ID_RLC_G);
3834 if (r)
3835 return -EINVAL;
3836 }
aaa36a97
AD
3837 }
3838
3839 gfx_v8_0_rlc_start(adev);
3840
3841 return 0;
3842}
3843
3844static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3845{
3846 int i;
3847 u32 tmp = RREG32(mmCP_ME_CNTL);
3848
3849 if (enable) {
3850 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3851 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3852 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3853 } else {
3854 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3855 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3856 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3857 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3858 adev->gfx.gfx_ring[i].ready = false;
3859 }
3860 WREG32(mmCP_ME_CNTL, tmp);
3861 udelay(50);
3862}
3863
3864static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3865{
3866 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3867 const struct gfx_firmware_header_v1_0 *ce_hdr;
3868 const struct gfx_firmware_header_v1_0 *me_hdr;
3869 const __le32 *fw_data;
3870 unsigned i, fw_size;
3871
3872 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3873 return -EINVAL;
3874
3875 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3876 adev->gfx.pfp_fw->data;
3877 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3878 adev->gfx.ce_fw->data;
3879 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3880 adev->gfx.me_fw->data;
3881
3882 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3883 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3884 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
3885
3886 gfx_v8_0_cp_gfx_enable(adev, false);
3887
3888 /* PFP */
3889 fw_data = (const __le32 *)
3890 (adev->gfx.pfp_fw->data +
3891 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3892 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3893 WREG32(mmCP_PFP_UCODE_ADDR, 0);
3894 for (i = 0; i < fw_size; i++)
3895 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3896 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3897
3898 /* CE */
3899 fw_data = (const __le32 *)
3900 (adev->gfx.ce_fw->data +
3901 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3902 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3903 WREG32(mmCP_CE_UCODE_ADDR, 0);
3904 for (i = 0; i < fw_size; i++)
3905 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3906 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3907
3908 /* ME */
3909 fw_data = (const __le32 *)
3910 (adev->gfx.me_fw->data +
3911 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3912 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3913 WREG32(mmCP_ME_RAM_WADDR, 0);
3914 for (i = 0; i < fw_size; i++)
3915 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3916 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3917
3918 return 0;
3919}
3920
3921static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3922{
3923 u32 count = 0;
3924 const struct cs_section_def *sect = NULL;
3925 const struct cs_extent_def *ext = NULL;
3926
3927 /* begin clear state */
3928 count += 2;
3929 /* context control state */
3930 count += 3;
3931
3932 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3933 for (ext = sect->section; ext->extent != NULL; ++ext) {
3934 if (sect->id == SECT_CONTEXT)
3935 count += 2 + ext->reg_count;
3936 else
3937 return 0;
3938 }
3939 }
3940 /* pa_sc_raster_config/pa_sc_raster_config1 */
3941 count += 4;
3942 /* end clear state */
3943 count += 2;
3944 /* clear state */
3945 count += 2;
3946
3947 return count;
3948}
3949
3950static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3951{
3952 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3953 const struct cs_section_def *sect = NULL;
3954 const struct cs_extent_def *ext = NULL;
3955 int r, i;
3956
3957 /* init the CP */
3958 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3959 WREG32(mmCP_ENDIAN_SWAP, 0);
3960 WREG32(mmCP_DEVICE_ID, 1);
3961
3962 gfx_v8_0_cp_gfx_enable(adev, true);
3963
a27de35c 3964 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
3965 if (r) {
3966 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3967 return r;
3968 }
3969
3970 /* clear state buffer */
3971 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3972 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3973
3974 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3975 amdgpu_ring_write(ring, 0x80000000);
3976 amdgpu_ring_write(ring, 0x80000000);
3977
3978 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3979 for (ext = sect->section; ext->extent != NULL; ++ext) {
3980 if (sect->id == SECT_CONTEXT) {
3981 amdgpu_ring_write(ring,
3982 PACKET3(PACKET3_SET_CONTEXT_REG,
3983 ext->reg_count));
3984 amdgpu_ring_write(ring,
3985 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3986 for (i = 0; i < ext->reg_count; i++)
3987 amdgpu_ring_write(ring, ext->extent[i]);
3988 }
3989 }
3990 }
3991
3992 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3993 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3994 switch (adev->asic_type) {
3995 case CHIP_TONGA:
2cc0c0b5 3996 case CHIP_POLARIS10:
aaa36a97
AD
3997 amdgpu_ring_write(ring, 0x16000012);
3998 amdgpu_ring_write(ring, 0x0000002A);
3999 break;
2cc0c0b5 4000 case CHIP_POLARIS11:
68182d90
FC
4001 amdgpu_ring_write(ring, 0x16000012);
4002 amdgpu_ring_write(ring, 0x00000000);
4003 break;
fa676048
FC
4004 case CHIP_FIJI:
4005 amdgpu_ring_write(ring, 0x3a00161a);
4006 amdgpu_ring_write(ring, 0x0000002e);
4007 break;
aaa36a97
AD
4008 case CHIP_CARRIZO:
4009 amdgpu_ring_write(ring, 0x00000002);
4010 amdgpu_ring_write(ring, 0x00000000);
4011 break;
d1a7f7aa
KW
4012 case CHIP_TOPAZ:
4013 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4014 0x00000000 : 0x00000002);
4015 amdgpu_ring_write(ring, 0x00000000);
4016 break;
e3c7656c
SL
4017 case CHIP_STONEY:
4018 amdgpu_ring_write(ring, 0x00000000);
4019 amdgpu_ring_write(ring, 0x00000000);
4020 break;
aaa36a97
AD
4021 default:
4022 BUG();
4023 }
4024
4025 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4026 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4027
4028 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4029 amdgpu_ring_write(ring, 0);
4030
4031 /* init the CE partitions */
4032 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4033 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4034 amdgpu_ring_write(ring, 0x8000);
4035 amdgpu_ring_write(ring, 0x8000);
4036
a27de35c 4037 amdgpu_ring_commit(ring);
aaa36a97
AD
4038
4039 return 0;
4040}
4041
4042static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4043{
4044 struct amdgpu_ring *ring;
4045 u32 tmp;
4046 u32 rb_bufsz;
4047 u64 rb_addr, rptr_addr;
4048 int r;
4049
4050 /* Set the write pointer delay */
4051 WREG32(mmCP_RB_WPTR_DELAY, 0);
4052
4053 /* set the RB to use vmid 0 */
4054 WREG32(mmCP_RB_VMID, 0);
4055
4056 /* Set ring buffer size */
4057 ring = &adev->gfx.gfx_ring[0];
4058 rb_bufsz = order_base_2(ring->ring_size / 8);
4059 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4060 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4061 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4062 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4063#ifdef __BIG_ENDIAN
4064 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4065#endif
4066 WREG32(mmCP_RB0_CNTL, tmp);
4067
4068 /* Initialize the ring buffer's read and write pointers */
4069 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4070 ring->wptr = 0;
4071 WREG32(mmCP_RB0_WPTR, ring->wptr);
4072
4073 /* set the wb address wether it's enabled or not */
4074 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4075 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4076 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4077
4078 mdelay(1);
4079 WREG32(mmCP_RB0_CNTL, tmp);
4080
4081 rb_addr = ring->gpu_addr >> 8;
4082 WREG32(mmCP_RB0_BASE, rb_addr);
4083 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4084
4085 /* no gfx doorbells on iceland */
4086 if (adev->asic_type != CHIP_TOPAZ) {
4087 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4088 if (ring->use_doorbell) {
4089 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4090 DOORBELL_OFFSET, ring->doorbell_index);
68182d90
FC
4091 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4092 DOORBELL_HIT, 0);
aaa36a97
AD
4093 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4094 DOORBELL_EN, 1);
4095 } else {
4096 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4097 DOORBELL_EN, 0);
4098 }
4099 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4100
4101 if (adev->asic_type == CHIP_TONGA) {
4102 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4103 DOORBELL_RANGE_LOWER,
4104 AMDGPU_DOORBELL_GFX_RING0);
4105 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4106
4107 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4108 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4109 }
4110
4111 }
4112
4113 /* start the ring */
4114 gfx_v8_0_cp_gfx_start(adev);
4115 ring->ready = true;
4116 r = amdgpu_ring_test_ring(ring);
4117 if (r) {
4118 ring->ready = false;
4119 return r;
4120 }
4121
4122 return 0;
4123}
4124
4125static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4126{
4127 int i;
4128
4129 if (enable) {
4130 WREG32(mmCP_MEC_CNTL, 0);
4131 } else {
4132 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4133 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4134 adev->gfx.compute_ring[i].ready = false;
4135 }
4136 udelay(50);
4137}
4138
aaa36a97
AD
4139static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4140{
4141 const struct gfx_firmware_header_v1_0 *mec_hdr;
4142 const __le32 *fw_data;
4143 unsigned i, fw_size;
4144
4145 if (!adev->gfx.mec_fw)
4146 return -EINVAL;
4147
4148 gfx_v8_0_cp_compute_enable(adev, false);
4149
4150 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4151 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4152
4153 fw_data = (const __le32 *)
4154 (adev->gfx.mec_fw->data +
4155 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4156 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4157
4158 /* MEC1 */
4159 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4160 for (i = 0; i < fw_size; i++)
4161 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4162 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4163
4164 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4165 if (adev->gfx.mec2_fw) {
4166 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4167
4168 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4169 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4170
4171 fw_data = (const __le32 *)
4172 (adev->gfx.mec2_fw->data +
4173 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4174 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4175
4176 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4177 for (i = 0; i < fw_size; i++)
4178 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4179 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4180 }
4181
4182 return 0;
4183}
4184
4185struct vi_mqd {
4186 uint32_t header; /* ordinal0 */
4187 uint32_t compute_dispatch_initiator; /* ordinal1 */
4188 uint32_t compute_dim_x; /* ordinal2 */
4189 uint32_t compute_dim_y; /* ordinal3 */
4190 uint32_t compute_dim_z; /* ordinal4 */
4191 uint32_t compute_start_x; /* ordinal5 */
4192 uint32_t compute_start_y; /* ordinal6 */
4193 uint32_t compute_start_z; /* ordinal7 */
4194 uint32_t compute_num_thread_x; /* ordinal8 */
4195 uint32_t compute_num_thread_y; /* ordinal9 */
4196 uint32_t compute_num_thread_z; /* ordinal10 */
4197 uint32_t compute_pipelinestat_enable; /* ordinal11 */
4198 uint32_t compute_perfcount_enable; /* ordinal12 */
4199 uint32_t compute_pgm_lo; /* ordinal13 */
4200 uint32_t compute_pgm_hi; /* ordinal14 */
4201 uint32_t compute_tba_lo; /* ordinal15 */
4202 uint32_t compute_tba_hi; /* ordinal16 */
4203 uint32_t compute_tma_lo; /* ordinal17 */
4204 uint32_t compute_tma_hi; /* ordinal18 */
4205 uint32_t compute_pgm_rsrc1; /* ordinal19 */
4206 uint32_t compute_pgm_rsrc2; /* ordinal20 */
4207 uint32_t compute_vmid; /* ordinal21 */
4208 uint32_t compute_resource_limits; /* ordinal22 */
4209 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
4210 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
4211 uint32_t compute_tmpring_size; /* ordinal25 */
4212 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
4213 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
4214 uint32_t compute_restart_x; /* ordinal28 */
4215 uint32_t compute_restart_y; /* ordinal29 */
4216 uint32_t compute_restart_z; /* ordinal30 */
4217 uint32_t compute_thread_trace_enable; /* ordinal31 */
4218 uint32_t compute_misc_reserved; /* ordinal32 */
4219 uint32_t compute_dispatch_id; /* ordinal33 */
4220 uint32_t compute_threadgroup_id; /* ordinal34 */
4221 uint32_t compute_relaunch; /* ordinal35 */
4222 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
4223 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
4224 uint32_t compute_wave_restore_control; /* ordinal38 */
4225 uint32_t reserved9; /* ordinal39 */
4226 uint32_t reserved10; /* ordinal40 */
4227 uint32_t reserved11; /* ordinal41 */
4228 uint32_t reserved12; /* ordinal42 */
4229 uint32_t reserved13; /* ordinal43 */
4230 uint32_t reserved14; /* ordinal44 */
4231 uint32_t reserved15; /* ordinal45 */
4232 uint32_t reserved16; /* ordinal46 */
4233 uint32_t reserved17; /* ordinal47 */
4234 uint32_t reserved18; /* ordinal48 */
4235 uint32_t reserved19; /* ordinal49 */
4236 uint32_t reserved20; /* ordinal50 */
4237 uint32_t reserved21; /* ordinal51 */
4238 uint32_t reserved22; /* ordinal52 */
4239 uint32_t reserved23; /* ordinal53 */
4240 uint32_t reserved24; /* ordinal54 */
4241 uint32_t reserved25; /* ordinal55 */
4242 uint32_t reserved26; /* ordinal56 */
4243 uint32_t reserved27; /* ordinal57 */
4244 uint32_t reserved28; /* ordinal58 */
4245 uint32_t reserved29; /* ordinal59 */
4246 uint32_t reserved30; /* ordinal60 */
4247 uint32_t reserved31; /* ordinal61 */
4248 uint32_t reserved32; /* ordinal62 */
4249 uint32_t reserved33; /* ordinal63 */
4250 uint32_t reserved34; /* ordinal64 */
4251 uint32_t compute_user_data_0; /* ordinal65 */
4252 uint32_t compute_user_data_1; /* ordinal66 */
4253 uint32_t compute_user_data_2; /* ordinal67 */
4254 uint32_t compute_user_data_3; /* ordinal68 */
4255 uint32_t compute_user_data_4; /* ordinal69 */
4256 uint32_t compute_user_data_5; /* ordinal70 */
4257 uint32_t compute_user_data_6; /* ordinal71 */
4258 uint32_t compute_user_data_7; /* ordinal72 */
4259 uint32_t compute_user_data_8; /* ordinal73 */
4260 uint32_t compute_user_data_9; /* ordinal74 */
4261 uint32_t compute_user_data_10; /* ordinal75 */
4262 uint32_t compute_user_data_11; /* ordinal76 */
4263 uint32_t compute_user_data_12; /* ordinal77 */
4264 uint32_t compute_user_data_13; /* ordinal78 */
4265 uint32_t compute_user_data_14; /* ordinal79 */
4266 uint32_t compute_user_data_15; /* ordinal80 */
4267 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
4268 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
4269 uint32_t reserved35; /* ordinal83 */
4270 uint32_t reserved36; /* ordinal84 */
4271 uint32_t reserved37; /* ordinal85 */
4272 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
4273 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
4274 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
4275 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
4276 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
4277 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
4278 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
4279 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
4280 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
4281 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
4282 uint32_t reserved38; /* ordinal96 */
4283 uint32_t reserved39; /* ordinal97 */
4284 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
4285 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
4286 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
4287 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
4288 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
4289 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
4290 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
4291 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
4292 uint32_t reserved40; /* ordinal106 */
4293 uint32_t reserved41; /* ordinal107 */
4294 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
4295 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
4296 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
4297 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
4298 uint32_t reserved42; /* ordinal112 */
4299 uint32_t reserved43; /* ordinal113 */
4300 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
4301 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
4302 uint32_t cp_packet_id_lo; /* ordinal116 */
4303 uint32_t cp_packet_id_hi; /* ordinal117 */
4304 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
4305 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
4306 uint32_t gds_save_base_addr_lo; /* ordinal120 */
4307 uint32_t gds_save_base_addr_hi; /* ordinal121 */
4308 uint32_t gds_save_mask_lo; /* ordinal122 */
4309 uint32_t gds_save_mask_hi; /* ordinal123 */
4310 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
4311 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
4312 uint32_t reserved44; /* ordinal126 */
4313 uint32_t reserved45; /* ordinal127 */
4314 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
4315 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
4316 uint32_t cp_hqd_active; /* ordinal130 */
4317 uint32_t cp_hqd_vmid; /* ordinal131 */
4318 uint32_t cp_hqd_persistent_state; /* ordinal132 */
4319 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
4320 uint32_t cp_hqd_queue_priority; /* ordinal134 */
4321 uint32_t cp_hqd_quantum; /* ordinal135 */
4322 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
4323 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
4324 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
4325 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
4326 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
4327 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
4328 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
4329 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
4330 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
4331 uint32_t cp_hqd_pq_control; /* ordinal145 */
4332 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
4333 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
4334 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
4335 uint32_t cp_hqd_ib_control; /* ordinal149 */
4336 uint32_t cp_hqd_iq_timer; /* ordinal150 */
4337 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
4338 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
4339 uint32_t cp_hqd_dma_offload; /* ordinal153 */
4340 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
4341 uint32_t cp_hqd_msg_type; /* ordinal155 */
4342 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
4343 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
4344 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
4345 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
4346 uint32_t cp_hqd_hq_status0; /* ordinal160 */
4347 uint32_t cp_hqd_hq_control0; /* ordinal161 */
4348 uint32_t cp_mqd_control; /* ordinal162 */
4349 uint32_t cp_hqd_hq_status1; /* ordinal163 */
4350 uint32_t cp_hqd_hq_control1; /* ordinal164 */
4351 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
4352 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
4353 uint32_t cp_hqd_eop_control; /* ordinal167 */
4354 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
4355 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
4356 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
4357 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
4358 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
4359 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
4360 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
4361 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
4362 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
4363 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
4364 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
4365 uint32_t cp_hqd_error; /* ordinal179 */
4366 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
4367 uint32_t cp_hqd_eop_dones; /* ordinal181 */
4368 uint32_t reserved46; /* ordinal182 */
4369 uint32_t reserved47; /* ordinal183 */
4370 uint32_t reserved48; /* ordinal184 */
4371 uint32_t reserved49; /* ordinal185 */
4372 uint32_t reserved50; /* ordinal186 */
4373 uint32_t reserved51; /* ordinal187 */
4374 uint32_t reserved52; /* ordinal188 */
4375 uint32_t reserved53; /* ordinal189 */
4376 uint32_t reserved54; /* ordinal190 */
4377 uint32_t reserved55; /* ordinal191 */
4378 uint32_t iqtimer_pkt_header; /* ordinal192 */
4379 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
4380 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
4381 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
4382 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
4383 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
4384 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
4385 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
4386 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
4387 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
4388 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
4389 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
4390 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
4391 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
4392 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
4393 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
4394 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
4395 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
4396 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
4397 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
4398 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
4399 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
4400 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
4401 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
4402 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
4403 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
4404 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
4405 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
4406 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
4407 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
4408 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
4409 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
4410 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
4411 uint32_t reserved56; /* ordinal225 */
4412 uint32_t reserved57; /* ordinal226 */
4413 uint32_t reserved58; /* ordinal227 */
4414 uint32_t set_resources_header; /* ordinal228 */
4415 uint32_t set_resources_dw1; /* ordinal229 */
4416 uint32_t set_resources_dw2; /* ordinal230 */
4417 uint32_t set_resources_dw3; /* ordinal231 */
4418 uint32_t set_resources_dw4; /* ordinal232 */
4419 uint32_t set_resources_dw5; /* ordinal233 */
4420 uint32_t set_resources_dw6; /* ordinal234 */
4421 uint32_t set_resources_dw7; /* ordinal235 */
4422 uint32_t reserved59; /* ordinal236 */
4423 uint32_t reserved60; /* ordinal237 */
4424 uint32_t reserved61; /* ordinal238 */
4425 uint32_t reserved62; /* ordinal239 */
4426 uint32_t reserved63; /* ordinal240 */
4427 uint32_t reserved64; /* ordinal241 */
4428 uint32_t reserved65; /* ordinal242 */
4429 uint32_t reserved66; /* ordinal243 */
4430 uint32_t reserved67; /* ordinal244 */
4431 uint32_t reserved68; /* ordinal245 */
4432 uint32_t reserved69; /* ordinal246 */
4433 uint32_t reserved70; /* ordinal247 */
4434 uint32_t reserved71; /* ordinal248 */
4435 uint32_t reserved72; /* ordinal249 */
4436 uint32_t reserved73; /* ordinal250 */
4437 uint32_t reserved74; /* ordinal251 */
4438 uint32_t reserved75; /* ordinal252 */
4439 uint32_t reserved76; /* ordinal253 */
4440 uint32_t reserved77; /* ordinal254 */
4441 uint32_t reserved78; /* ordinal255 */
4442
4443 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4444};
4445
4446static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4447{
4448 int i, r;
4449
4450 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4451 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4452
4453 if (ring->mqd_obj) {
4454 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4455 if (unlikely(r != 0))
4456 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4457
4458 amdgpu_bo_unpin(ring->mqd_obj);
4459 amdgpu_bo_unreserve(ring->mqd_obj);
4460
4461 amdgpu_bo_unref(&ring->mqd_obj);
4462 ring->mqd_obj = NULL;
4463 }
4464 }
4465}
4466
4467static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4468{
4469 int r, i, j;
4470 u32 tmp;
4471 bool use_doorbell = true;
4472 u64 hqd_gpu_addr;
4473 u64 mqd_gpu_addr;
4474 u64 eop_gpu_addr;
4475 u64 wb_gpu_addr;
4476 u32 *buf;
4477 struct vi_mqd *mqd;
4478
4479 /* init the pipes */
4480 mutex_lock(&adev->srbm_mutex);
4481 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4482 int me = (i < 4) ? 1 : 2;
4483 int pipe = (i < 4) ? i : (i - 4);
4484
4485 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4486 eop_gpu_addr >>= 8;
4487
4488 vi_srbm_select(adev, me, pipe, 0, 0);
4489
4490 /* write the EOP addr */
4491 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4492 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4493
4494 /* set the VMID assigned */
4495 WREG32(mmCP_HQD_VMID, 0);
4496
4497 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4498 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4499 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4500 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4501 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4502 }
4503 vi_srbm_select(adev, 0, 0, 0, 0);
4504 mutex_unlock(&adev->srbm_mutex);
4505
4506 /* init the queues. Just two for now. */
4507 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4508 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4509
4510 if (ring->mqd_obj == NULL) {
4511 r = amdgpu_bo_create(adev,
4512 sizeof(struct vi_mqd),
4513 PAGE_SIZE, true,
4514 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
72d7668b 4515 NULL, &ring->mqd_obj);
aaa36a97
AD
4516 if (r) {
4517 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4518 return r;
4519 }
4520 }
4521
4522 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4523 if (unlikely(r != 0)) {
4524 gfx_v8_0_cp_compute_fini(adev);
4525 return r;
4526 }
4527 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4528 &mqd_gpu_addr);
4529 if (r) {
4530 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4531 gfx_v8_0_cp_compute_fini(adev);
4532 return r;
4533 }
4534 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4535 if (r) {
4536 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4537 gfx_v8_0_cp_compute_fini(adev);
4538 return r;
4539 }
4540
4541 /* init the mqd struct */
4542 memset(buf, 0, sizeof(struct vi_mqd));
4543
4544 mqd = (struct vi_mqd *)buf;
4545 mqd->header = 0xC0310800;
4546 mqd->compute_pipelinestat_enable = 0x00000001;
4547 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4548 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4549 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4550 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4551 mqd->compute_misc_reserved = 0x00000003;
4552
4553 mutex_lock(&adev->srbm_mutex);
4554 vi_srbm_select(adev, ring->me,
4555 ring->pipe,
4556 ring->queue, 0);
4557
4558 /* disable wptr polling */
4559 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4560 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4561 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4562
4563 mqd->cp_hqd_eop_base_addr_lo =
4564 RREG32(mmCP_HQD_EOP_BASE_ADDR);
4565 mqd->cp_hqd_eop_base_addr_hi =
4566 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4567
4568 /* enable doorbell? */
4569 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4570 if (use_doorbell) {
4571 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4572 } else {
4573 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4574 }
4575 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4576 mqd->cp_hqd_pq_doorbell_control = tmp;
4577
4578 /* disable the queue if it's active */
4579 mqd->cp_hqd_dequeue_request = 0;
4580 mqd->cp_hqd_pq_rptr = 0;
4581 mqd->cp_hqd_pq_wptr= 0;
4582 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4583 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4584 for (j = 0; j < adev->usec_timeout; j++) {
4585 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4586 break;
4587 udelay(1);
4588 }
4589 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4590 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4591 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4592 }
4593
4594 /* set the pointer to the MQD */
4595 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4596 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4597 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4598 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4599
4600 /* set MQD vmid to 0 */
4601 tmp = RREG32(mmCP_MQD_CONTROL);
4602 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4603 WREG32(mmCP_MQD_CONTROL, tmp);
4604 mqd->cp_mqd_control = tmp;
4605
4606 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4607 hqd_gpu_addr = ring->gpu_addr >> 8;
4608 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4609 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4610 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4611 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4612
4613 /* set up the HQD, this is similar to CP_RB0_CNTL */
4614 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4615 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4616 (order_base_2(ring->ring_size / 4) - 1));
4617 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4618 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4619#ifdef __BIG_ENDIAN
4620 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4621#endif
4622 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4623 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4624 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4625 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4626 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4627 mqd->cp_hqd_pq_control = tmp;
4628
4629 /* set the wb address wether it's enabled or not */
4630 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4631 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4632 mqd->cp_hqd_pq_rptr_report_addr_hi =
4633 upper_32_bits(wb_gpu_addr) & 0xffff;
4634 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4635 mqd->cp_hqd_pq_rptr_report_addr_lo);
4636 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4637 mqd->cp_hqd_pq_rptr_report_addr_hi);
4638
4639 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4640 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4641 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4642 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4643 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4644 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4645 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4646
4647 /* enable the doorbell if requested */
4648 if (use_doorbell) {
bddf8026 4649 if ((adev->asic_type == CHIP_CARRIZO) ||
e3c7656c 4650 (adev->asic_type == CHIP_FIJI) ||
68182d90 4651 (adev->asic_type == CHIP_STONEY) ||
2cc0c0b5
FC
4652 (adev->asic_type == CHIP_POLARIS11) ||
4653 (adev->asic_type == CHIP_POLARIS10)) {
aaa36a97
AD
4654 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4655 AMDGPU_DOORBELL_KIQ << 2);
4656 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
b8826b0c 4657 AMDGPU_DOORBELL_MEC_RING7 << 2);
aaa36a97
AD
4658 }
4659 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4660 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4661 DOORBELL_OFFSET, ring->doorbell_index);
4662 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4663 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4664 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4665 mqd->cp_hqd_pq_doorbell_control = tmp;
4666
4667 } else {
4668 mqd->cp_hqd_pq_doorbell_control = 0;
4669 }
4670 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4671 mqd->cp_hqd_pq_doorbell_control);
4672
845253e7
SJ
4673 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4674 ring->wptr = 0;
4675 mqd->cp_hqd_pq_wptr = ring->wptr;
4676 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4677 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4678
aaa36a97
AD
4679 /* set the vmid for the queue */
4680 mqd->cp_hqd_vmid = 0;
4681 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4682
4683 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4684 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4685 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4686 mqd->cp_hqd_persistent_state = tmp;
68182d90 4687 if (adev->asic_type == CHIP_STONEY ||
2cc0c0b5
FC
4688 adev->asic_type == CHIP_POLARIS11 ||
4689 adev->asic_type == CHIP_POLARIS10) {
3b55ddad
FC
4690 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4691 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4692 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4693 }
aaa36a97
AD
4694
4695 /* activate the queue */
4696 mqd->cp_hqd_active = 1;
4697 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4698
4699 vi_srbm_select(adev, 0, 0, 0, 0);
4700 mutex_unlock(&adev->srbm_mutex);
4701
4702 amdgpu_bo_kunmap(ring->mqd_obj);
4703 amdgpu_bo_unreserve(ring->mqd_obj);
4704 }
4705
4706 if (use_doorbell) {
4707 tmp = RREG32(mmCP_PQ_STATUS);
4708 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4709 WREG32(mmCP_PQ_STATUS, tmp);
4710 }
4711
6e9821b2 4712 gfx_v8_0_cp_compute_enable(adev, true);
aaa36a97
AD
4713
4714 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4715 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4716
4717 ring->ready = true;
4718 r = amdgpu_ring_test_ring(ring);
4719 if (r)
4720 ring->ready = false;
4721 }
4722
4723 return 0;
4724}
4725
4726static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4727{
4728 int r;
4729
e3c7656c 4730 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4731 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4732
e61710c5 4733 if (!adev->pp_enabled) {
ba5c2a87
RZ
4734 if (!adev->firmware.smu_load) {
4735 /* legacy firmware loading */
4736 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4737 if (r)
4738 return r;
aaa36a97 4739
ba5c2a87
RZ
4740 r = gfx_v8_0_cp_compute_load_microcode(adev);
4741 if (r)
4742 return r;
4743 } else {
4744 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4745 AMDGPU_UCODE_ID_CP_CE);
4746 if (r)
4747 return -EINVAL;
4748
4749 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4750 AMDGPU_UCODE_ID_CP_PFP);
4751 if (r)
4752 return -EINVAL;
4753
4754 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4755 AMDGPU_UCODE_ID_CP_ME);
4756 if (r)
4757 return -EINVAL;
4758
951e0962
AD
4759 if (adev->asic_type == CHIP_TOPAZ) {
4760 r = gfx_v8_0_cp_compute_load_microcode(adev);
4761 if (r)
4762 return r;
4763 } else {
4764 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4765 AMDGPU_UCODE_ID_CP_MEC1);
4766 if (r)
4767 return -EINVAL;
4768 }
ba5c2a87 4769 }
aaa36a97
AD
4770 }
4771
4772 r = gfx_v8_0_cp_gfx_resume(adev);
4773 if (r)
4774 return r;
4775
4776 r = gfx_v8_0_cp_compute_resume(adev);
4777 if (r)
4778 return r;
4779
4780 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4781
4782 return 0;
4783}
4784
4785static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4786{
4787 gfx_v8_0_cp_gfx_enable(adev, enable);
4788 gfx_v8_0_cp_compute_enable(adev, enable);
4789}
4790
5fc3aeeb 4791static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
4792{
4793 int r;
5fc3aeeb 4794 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4795
4796 gfx_v8_0_init_golden_registers(adev);
4797
4798 gfx_v8_0_gpu_init(adev);
4799
4800 r = gfx_v8_0_rlc_resume(adev);
4801 if (r)
4802 return r;
4803
4804 r = gfx_v8_0_cp_resume(adev);
4805 if (r)
4806 return r;
4807
4808 return r;
4809}
4810
5fc3aeeb 4811static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 4812{
5fc3aeeb 4813 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4814
1d22a454
AD
4815 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4816 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
aaa36a97
AD
4817 gfx_v8_0_cp_enable(adev, false);
4818 gfx_v8_0_rlc_stop(adev);
4819 gfx_v8_0_cp_compute_fini(adev);
4820
62a86fc2
EH
4821 amdgpu_set_powergating_state(adev,
4822 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4823
aaa36a97
AD
4824 return 0;
4825}
4826
5fc3aeeb 4827static int gfx_v8_0_suspend(void *handle)
aaa36a97 4828{
5fc3aeeb 4829 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830
aaa36a97
AD
4831 return gfx_v8_0_hw_fini(adev);
4832}
4833
5fc3aeeb 4834static int gfx_v8_0_resume(void *handle)
aaa36a97 4835{
5fc3aeeb 4836 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837
aaa36a97
AD
4838 return gfx_v8_0_hw_init(adev);
4839}
4840
5fc3aeeb 4841static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 4842{
5fc3aeeb 4843 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4844
aaa36a97
AD
4845 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4846 return false;
4847 else
4848 return true;
4849}
4850
5fc3aeeb 4851static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
4852{
4853 unsigned i;
4854 u32 tmp;
5fc3aeeb 4855 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4856
4857 for (i = 0; i < adev->usec_timeout; i++) {
4858 /* read MC_STATUS */
4859 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4860
4861 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4862 return 0;
4863 udelay(1);
4864 }
4865 return -ETIMEDOUT;
4866}
4867
5fc3aeeb 4868static int gfx_v8_0_soft_reset(void *handle)
aaa36a97
AD
4869{
4870 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4871 u32 tmp;
5fc3aeeb 4872 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4873
4874 /* GRBM_STATUS */
4875 tmp = RREG32(mmGRBM_STATUS);
4876 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4877 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4878 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4879 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4880 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4881 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4882 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4883 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4884 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4885 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4886 }
4887
4888 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4889 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4890 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4891 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4892 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4893 }
4894
4895 /* GRBM_STATUS2 */
4896 tmp = RREG32(mmGRBM_STATUS2);
4897 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4898 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4899 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4900
4901 /* SRBM_STATUS */
4902 tmp = RREG32(mmSRBM_STATUS);
4903 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4904 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4905 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4906
4907 if (grbm_soft_reset || srbm_soft_reset) {
aaa36a97
AD
4908 /* stop the rlc */
4909 gfx_v8_0_rlc_stop(adev);
4910
4911 /* Disable GFX parsing/prefetching */
4912 gfx_v8_0_cp_gfx_enable(adev, false);
4913
4914 /* Disable MEC parsing/prefetching */
7776a693
AD
4915 gfx_v8_0_cp_compute_enable(adev, false);
4916
4917 if (grbm_soft_reset || srbm_soft_reset) {
4918 tmp = RREG32(mmGMCON_DEBUG);
4919 tmp = REG_SET_FIELD(tmp,
4920 GMCON_DEBUG, GFX_STALL, 1);
4921 tmp = REG_SET_FIELD(tmp,
4922 GMCON_DEBUG, GFX_CLEAR, 1);
4923 WREG32(mmGMCON_DEBUG, tmp);
4924
4925 udelay(50);
4926 }
aaa36a97
AD
4927
4928 if (grbm_soft_reset) {
4929 tmp = RREG32(mmGRBM_SOFT_RESET);
4930 tmp |= grbm_soft_reset;
4931 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4932 WREG32(mmGRBM_SOFT_RESET, tmp);
4933 tmp = RREG32(mmGRBM_SOFT_RESET);
4934
4935 udelay(50);
4936
4937 tmp &= ~grbm_soft_reset;
4938 WREG32(mmGRBM_SOFT_RESET, tmp);
4939 tmp = RREG32(mmGRBM_SOFT_RESET);
4940 }
4941
4942 if (srbm_soft_reset) {
4943 tmp = RREG32(mmSRBM_SOFT_RESET);
4944 tmp |= srbm_soft_reset;
4945 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4946 WREG32(mmSRBM_SOFT_RESET, tmp);
4947 tmp = RREG32(mmSRBM_SOFT_RESET);
4948
4949 udelay(50);
4950
4951 tmp &= ~srbm_soft_reset;
4952 WREG32(mmSRBM_SOFT_RESET, tmp);
4953 tmp = RREG32(mmSRBM_SOFT_RESET);
4954 }
7776a693
AD
4955
4956 if (grbm_soft_reset || srbm_soft_reset) {
4957 tmp = RREG32(mmGMCON_DEBUG);
4958 tmp = REG_SET_FIELD(tmp,
4959 GMCON_DEBUG, GFX_STALL, 0);
4960 tmp = REG_SET_FIELD(tmp,
4961 GMCON_DEBUG, GFX_CLEAR, 0);
4962 WREG32(mmGMCON_DEBUG, tmp);
4963 }
4964
aaa36a97
AD
4965 /* Wait a little for things to settle down */
4966 udelay(50);
aaa36a97
AD
4967 }
4968 return 0;
4969}
4970
4971/**
4972 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4973 *
4974 * @adev: amdgpu_device pointer
4975 *
4976 * Fetches a GPU clock counter snapshot.
4977 * Returns the 64 bit clock counter snapshot.
4978 */
4979uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4980{
4981 uint64_t clock;
4982
4983 mutex_lock(&adev->gfx.gpu_clock_mutex);
4984 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4985 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4986 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4987 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4988 return clock;
4989}
4990
4991static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4992 uint32_t vmid,
4993 uint32_t gds_base, uint32_t gds_size,
4994 uint32_t gws_base, uint32_t gws_size,
4995 uint32_t oa_base, uint32_t oa_size)
4996{
4997 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4998 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4999
5000 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5001 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5002
5003 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5004 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5005
5006 /* GDS Base */
5007 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5008 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5009 WRITE_DATA_DST_SEL(0)));
5010 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5011 amdgpu_ring_write(ring, 0);
5012 amdgpu_ring_write(ring, gds_base);
5013
5014 /* GDS Size */
5015 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5016 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5017 WRITE_DATA_DST_SEL(0)));
5018 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5019 amdgpu_ring_write(ring, 0);
5020 amdgpu_ring_write(ring, gds_size);
5021
5022 /* GWS */
5023 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5024 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5025 WRITE_DATA_DST_SEL(0)));
5026 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5027 amdgpu_ring_write(ring, 0);
5028 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5029
5030 /* OA */
5031 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5032 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5033 WRITE_DATA_DST_SEL(0)));
5034 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5035 amdgpu_ring_write(ring, 0);
5036 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5037}
5038
5fc3aeeb 5039static int gfx_v8_0_early_init(void *handle)
aaa36a97 5040{
5fc3aeeb 5041 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5042
5043 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5044 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5045 gfx_v8_0_set_ring_funcs(adev);
5046 gfx_v8_0_set_irq_funcs(adev);
5047 gfx_v8_0_set_gds_init(adev);
dbff57bc 5048 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5049
5050 return 0;
5051}
5052
ccba7691
AD
5053static int gfx_v8_0_late_init(void *handle)
5054{
5055 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5056 int r;
5057
1d22a454
AD
5058 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5059 if (r)
5060 return r;
5061
5062 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5063 if (r)
5064 return r;
5065
ccba7691
AD
5066 /* requires IBs so do in late init after IB pool is initialized */
5067 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5068 if (r)
5069 return r;
5070
62a86fc2
EH
5071 amdgpu_set_powergating_state(adev,
5072 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5073
ccba7691
AD
5074 return 0;
5075}
5076
2cc0c0b5 5077static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5078 bool enable)
5079{
5080 uint32_t data, temp;
5081
5082 /* Send msg to SMU via Powerplay */
5083 amdgpu_set_powergating_state(adev,
5084 AMD_IP_BLOCK_TYPE_SMC,
5085 enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5086
5087 if (enable) {
5088 /* Enable static MGPG */
5089 temp = data = RREG32(mmRLC_PG_CNTL);
5090 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5091
5092 if (temp != data)
5093 WREG32(mmRLC_PG_CNTL, data);
5094 } else {
5095 temp = data = RREG32(mmRLC_PG_CNTL);
5096 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5097
5098 if (temp != data)
5099 WREG32(mmRLC_PG_CNTL, data);
5100 }
5101}
5102
2cc0c0b5 5103static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5104 bool enable)
5105{
5106 uint32_t data, temp;
5107
5108 if (enable) {
5109 /* Enable dynamic MGPG */
5110 temp = data = RREG32(mmRLC_PG_CNTL);
5111 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5112
5113 if (temp != data)
5114 WREG32(mmRLC_PG_CNTL, data);
5115 } else {
5116 temp = data = RREG32(mmRLC_PG_CNTL);
5117 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5118
5119 if (temp != data)
5120 WREG32(mmRLC_PG_CNTL, data);
5121 }
5122}
5123
2cc0c0b5 5124static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5125 bool enable)
5126{
5127 uint32_t data, temp;
5128
5129 if (enable) {
5130 /* Enable quick PG */
5131 temp = data = RREG32(mmRLC_PG_CNTL);
5132 data |= 0x100000;
5133
5134 if (temp != data)
5135 WREG32(mmRLC_PG_CNTL, data);
5136 } else {
5137 temp = data = RREG32(mmRLC_PG_CNTL);
5138 data &= ~0x100000;
5139
5140 if (temp != data)
5141 WREG32(mmRLC_PG_CNTL, data);
5142 }
5143}
5144
5fc3aeeb 5145static int gfx_v8_0_set_powergating_state(void *handle,
5146 enum amd_powergating_state state)
aaa36a97 5147{
62a86fc2
EH
5148 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5149
5150 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5151 return 0;
5152
5153 switch (adev->asic_type) {
2cc0c0b5
FC
5154 case CHIP_POLARIS11:
5155 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5156 polaris11_enable_gfx_static_mg_power_gating(adev,
62a86fc2 5157 state == AMD_PG_STATE_GATE ? true : false);
2cc0c0b5
FC
5158 else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5159 polaris11_enable_gfx_dynamic_mg_power_gating(adev,
62a86fc2
EH
5160 state == AMD_PG_STATE_GATE ? true : false);
5161 else
2cc0c0b5 5162 polaris11_enable_gfx_quick_mg_power_gating(adev,
62a86fc2
EH
5163 state == AMD_PG_STATE_GATE ? true : false);
5164 break;
5165 default:
5166 break;
5167 }
5168
aaa36a97
AD
5169 return 0;
5170}
5171
79deaaf4 5172static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5173 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5174{
5175 uint32_t data;
5176
5177 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5178
5179 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5180 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5181
5182 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f
AD
5183 if (adev->asic_type == CHIP_STONEY)
5184 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
6e378858
EH
5185 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5186 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5187 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5188 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5189 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5190 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5191 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
6e378858 5192 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5193 else
5194 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5195 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5196 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5197 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5198 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5199 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5200 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5201 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5202 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5203 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5204 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5205 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5206 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5207 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5208 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5209
5210 WREG32(mmRLC_SERDES_WR_CTRL, data);
5211}
5212
dbff57bc
AD
5213#define MSG_ENTER_RLC_SAFE_MODE 1
5214#define MSG_EXIT_RLC_SAFE_MODE 0
5215
5216#define RLC_GPR_REG2__REQ_MASK 0x00000001
5217#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5218#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5219
5220static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5221{
5222 u32 data = 0;
5223 unsigned i;
5224
5225 data = RREG32(mmRLC_CNTL);
5226 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5227 return;
5228
5229 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5230 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5231 AMD_PG_SUPPORT_GFX_DMG))) {
5232 data |= RLC_GPR_REG2__REQ_MASK;
5233 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5234 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5235 WREG32(mmRLC_GPR_REG2, data);
5236
5237 for (i = 0; i < adev->usec_timeout; i++) {
5238 if ((RREG32(mmRLC_GPM_STAT) &
5239 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5240 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5241 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5242 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5243 break;
5244 udelay(1);
5245 }
5246
5247 for (i = 0; i < adev->usec_timeout; i++) {
5248 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5249 break;
5250 udelay(1);
5251 }
5252 adev->gfx.rlc.in_safe_mode = true;
5253 }
5254}
5255
5256static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5257{
5258 u32 data;
5259 unsigned i;
5260
5261 data = RREG32(mmRLC_CNTL);
5262 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5263 return;
5264
5265 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5266 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5267 AMD_PG_SUPPORT_GFX_DMG))) {
5268 data |= RLC_GPR_REG2__REQ_MASK;
5269 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5270 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5271 WREG32(mmRLC_GPR_REG2, data);
5272 adev->gfx.rlc.in_safe_mode = false;
5273 }
5274
5275 for (i = 0; i < adev->usec_timeout; i++) {
5276 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5277 break;
5278 udelay(1);
5279 }
5280}
5281
5282static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5283{
5284 u32 data;
5285 unsigned i;
5286
5287 data = RREG32(mmRLC_CNTL);
5288 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5289 return;
5290
5291 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5292 data |= RLC_SAFE_MODE__CMD_MASK;
5293 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5294 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5295 WREG32(mmRLC_SAFE_MODE, data);
5296
5297 for (i = 0; i < adev->usec_timeout; i++) {
5298 if ((RREG32(mmRLC_GPM_STAT) &
5299 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5300 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5301 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5302 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5303 break;
5304 udelay(1);
5305 }
5306
5307 for (i = 0; i < adev->usec_timeout; i++) {
5308 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5309 break;
5310 udelay(1);
5311 }
5312 adev->gfx.rlc.in_safe_mode = true;
5313 }
5314}
5315
5316static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5317{
5318 u32 data = 0;
5319 unsigned i;
5320
5321 data = RREG32(mmRLC_CNTL);
5322 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5323 return;
5324
5325 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5326 if (adev->gfx.rlc.in_safe_mode) {
5327 data |= RLC_SAFE_MODE__CMD_MASK;
5328 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5329 WREG32(mmRLC_SAFE_MODE, data);
5330 adev->gfx.rlc.in_safe_mode = false;
5331 }
5332 }
5333
5334 for (i = 0; i < adev->usec_timeout; i++) {
5335 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5336 break;
5337 udelay(1);
5338 }
5339}
5340
5341static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5342{
5343 adev->gfx.rlc.in_safe_mode = true;
5344}
5345
5346static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5347{
5348 adev->gfx.rlc.in_safe_mode = false;
5349}
5350
5351static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5352 .enter_safe_mode = cz_enter_rlc_safe_mode,
5353 .exit_safe_mode = cz_exit_rlc_safe_mode
5354};
5355
5356static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5357 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5358 .exit_safe_mode = iceland_exit_rlc_safe_mode
5359};
5360
5361static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5362 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5363 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5364};
5365
5366static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5367 bool enable)
6e378858
EH
5368{
5369 uint32_t temp, data;
5370
dbff57bc
AD
5371 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5372
6e378858 5373 /* It is disabled by HW by default */
14698b6c
AD
5374 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5375 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5376 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5377 /* 1 - RLC memory Light sleep */
5378 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5379 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5380 if (temp != data)
5381 WREG32(mmRLC_MEM_SLP_CNTL, data);
5382 }
6e378858 5383
14698b6c
AD
5384 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5385 /* 2 - CP memory Light sleep */
5386 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5387 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5388 if (temp != data)
5389 WREG32(mmCP_MEM_SLP_CNTL, data);
5390 }
5391 }
6e378858
EH
5392
5393 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5394 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
5395 if (adev->flags & AMD_IS_APU)
5396 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5397 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5398 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5399 else
5400 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5401 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5402 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5403 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
5404
5405 if (temp != data)
5406 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5407
5408 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5409 gfx_v8_0_wait_for_rlc_serdes(adev);
5410
5411 /* 5 - clear mgcg override */
79deaaf4 5412 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 5413
14698b6c
AD
5414 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5415 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5416 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5417 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5418 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5419 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5420 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5421 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5422 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5423 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5424 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5425 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5426 if (temp != data)
5427 WREG32(mmCGTS_SM_CTRL_REG, data);
5428 }
6e378858
EH
5429 udelay(50);
5430
5431 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5432 gfx_v8_0_wait_for_rlc_serdes(adev);
5433 } else {
5434 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5435 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5436 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5437 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5438 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5439 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5440 if (temp != data)
5441 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5442
5443 /* 2 - disable MGLS in RLC */
5444 data = RREG32(mmRLC_MEM_SLP_CNTL);
5445 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5446 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5447 WREG32(mmRLC_MEM_SLP_CNTL, data);
5448 }
5449
5450 /* 3 - disable MGLS in CP */
5451 data = RREG32(mmCP_MEM_SLP_CNTL);
5452 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5453 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5454 WREG32(mmCP_MEM_SLP_CNTL, data);
5455 }
5456
5457 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5458 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5459 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5460 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5461 if (temp != data)
5462 WREG32(mmCGTS_SM_CTRL_REG, data);
5463
5464 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5465 gfx_v8_0_wait_for_rlc_serdes(adev);
5466
5467 /* 6 - set mgcg override */
79deaaf4 5468 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
5469
5470 udelay(50);
5471
5472 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5473 gfx_v8_0_wait_for_rlc_serdes(adev);
5474 }
dbff57bc
AD
5475
5476 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
5477}
5478
dbff57bc
AD
5479static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5480 bool enable)
6e378858
EH
5481{
5482 uint32_t temp, temp1, data, data1;
5483
5484 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5485
dbff57bc
AD
5486 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5487
14698b6c 5488 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
5489 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5490 * Cmp_busy/GFX_Idle interrupts
5491 */
5492 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5493
5494 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5495 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5496 if (temp1 != data1)
5497 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5498
5499 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5500 gfx_v8_0_wait_for_rlc_serdes(adev);
5501
5502 /* 3 - clear cgcg override */
79deaaf4 5503 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
5504
5505 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5506 gfx_v8_0_wait_for_rlc_serdes(adev);
5507
5508 /* 4 - write cmd to set CGLS */
79deaaf4 5509 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858
EH
5510
5511 /* 5 - enable cgcg */
5512 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5513
14698b6c
AD
5514 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5515 /* enable cgls*/
5516 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 5517
14698b6c
AD
5518 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5519 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 5520
14698b6c
AD
5521 if (temp1 != data1)
5522 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5523 } else {
5524 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5525 }
6e378858
EH
5526
5527 if (temp != data)
5528 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5529 } else {
5530 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5531 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5532
5533 /* TEST CGCG */
5534 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5535 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5536 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5537 if (temp1 != data1)
5538 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5539
5540 /* read gfx register to wake up cgcg */
5541 RREG32(mmCB_CGTT_SCLK_CTRL);
5542 RREG32(mmCB_CGTT_SCLK_CTRL);
5543 RREG32(mmCB_CGTT_SCLK_CTRL);
5544 RREG32(mmCB_CGTT_SCLK_CTRL);
5545
5546 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5547 gfx_v8_0_wait_for_rlc_serdes(adev);
5548
5549 /* write cmd to Set CGCG Overrride */
79deaaf4 5550 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
5551
5552 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5553 gfx_v8_0_wait_for_rlc_serdes(adev);
5554
5555 /* write cmd to Clear CGLS */
79deaaf4 5556 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
5557
5558 /* disable cgcg, cgls should be disabled too. */
5559 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 5560 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
5561 if (temp != data)
5562 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5563 }
dbff57bc
AD
5564
5565 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 5566}
dbff57bc
AD
5567static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5568 bool enable)
6e378858
EH
5569{
5570 if (enable) {
5571 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5572 * === MGCG + MGLS + TS(CG/LS) ===
5573 */
dbff57bc
AD
5574 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5575 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
5576 } else {
5577 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5578 * === CGCG + CGLS ===
5579 */
dbff57bc
AD
5580 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5581 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
5582 }
5583 return 0;
5584}
5585
5fc3aeeb 5586static int gfx_v8_0_set_clockgating_state(void *handle,
5587 enum amd_clockgating_state state)
aaa36a97 5588{
6e378858
EH
5589 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5590
5591 switch (adev->asic_type) {
5592 case CHIP_FIJI:
dbff57bc
AD
5593 case CHIP_CARRIZO:
5594 case CHIP_STONEY:
5595 gfx_v8_0_update_gfx_clock_gating(adev,
5596 state == AMD_CG_STATE_GATE ? true : false);
6e378858
EH
5597 break;
5598 default:
5599 break;
5600 }
aaa36a97
AD
5601 return 0;
5602}
5603
5604static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5605{
5606 u32 rptr;
5607
5608 rptr = ring->adev->wb.wb[ring->rptr_offs];
5609
5610 return rptr;
5611}
5612
5613static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5614{
5615 struct amdgpu_device *adev = ring->adev;
5616 u32 wptr;
5617
5618 if (ring->use_doorbell)
5619 /* XXX check if swapping is necessary on BE */
5620 wptr = ring->adev->wb.wb[ring->wptr_offs];
5621 else
5622 wptr = RREG32(mmCP_RB0_WPTR);
5623
5624 return wptr;
5625}
5626
5627static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5628{
5629 struct amdgpu_device *adev = ring->adev;
5630
5631 if (ring->use_doorbell) {
5632 /* XXX check if swapping is necessary on BE */
5633 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5634 WDOORBELL32(ring->doorbell_index, ring->wptr);
5635 } else {
5636 WREG32(mmCP_RB0_WPTR, ring->wptr);
5637 (void)RREG32(mmCP_RB0_WPTR);
5638 }
5639}
5640
d2edb07b 5641static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
5642{
5643 u32 ref_and_mask, reg_mem_engine;
5644
5645 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5646 switch (ring->me) {
5647 case 1:
5648 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5649 break;
5650 case 2:
5651 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5652 break;
5653 default:
5654 return;
5655 }
5656 reg_mem_engine = 0;
5657 } else {
5658 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5659 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5660 }
5661
5662 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5663 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5664 WAIT_REG_MEM_FUNCTION(3) | /* == */
5665 reg_mem_engine));
5666 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5667 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5668 amdgpu_ring_write(ring, ref_and_mask);
5669 amdgpu_ring_write(ring, ref_and_mask);
5670 amdgpu_ring_write(ring, 0x20); /* poll interval */
5671}
5672
d35db561
CZ
5673static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5674{
5675 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5676 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5677 WRITE_DATA_DST_SEL(0) |
5678 WR_CONFIRM));
5679 amdgpu_ring_write(ring, mmHDP_DEBUG0);
5680 amdgpu_ring_write(ring, 0);
5681 amdgpu_ring_write(ring, 1);
5682
5683}
5684
93323131 5685static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583
CK
5686 struct amdgpu_ib *ib,
5687 unsigned vm_id, bool ctx_switch)
aaa36a97
AD
5688{
5689 u32 header, control = 0;
5690 u32 next_rptr = ring->wptr + 5;
aa2bdb24 5691
f153d286 5692 if (ctx_switch)
aaa36a97
AD
5693 next_rptr += 2;
5694
5695 next_rptr += 4;
5696 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5698 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5699 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5700 amdgpu_ring_write(ring, next_rptr);
5701
aaa36a97 5702 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
f153d286 5703 if (ctx_switch) {
aaa36a97
AD
5704 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5705 amdgpu_ring_write(ring, 0);
aaa36a97
AD
5706 }
5707
de807f81 5708 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
5709 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5710 else
5711 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5712
d88bf583 5713 control |= ib->length_dw | (vm_id << 24);
aaa36a97
AD
5714
5715 amdgpu_ring_write(ring, header);
5716 amdgpu_ring_write(ring,
5717#ifdef __BIG_ENDIAN
5718 (2 << 0) |
5719#endif
5720 (ib->gpu_addr & 0xFFFFFFFC));
5721 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5722 amdgpu_ring_write(ring, control);
5723}
5724
93323131 5725static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583
CK
5726 struct amdgpu_ib *ib,
5727 unsigned vm_id, bool ctx_switch)
93323131 5728{
5729 u32 header, control = 0;
5730 u32 next_rptr = ring->wptr + 5;
5731
5732 control |= INDIRECT_BUFFER_VALID;
5733
5734 next_rptr += 4;
5735 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5736 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5737 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5738 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5739 amdgpu_ring_write(ring, next_rptr);
5740
5741 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5742
d88bf583 5743 control |= ib->length_dw | (vm_id << 24);
93323131 5744
5745 amdgpu_ring_write(ring, header);
5746 amdgpu_ring_write(ring,
5747#ifdef __BIG_ENDIAN
5748 (2 << 0) |
5749#endif
5750 (ib->gpu_addr & 0xFFFFFFFC));
5751 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5752 amdgpu_ring_write(ring, control);
5753}
5754
aaa36a97 5755static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 5756 u64 seq, unsigned flags)
aaa36a97 5757{
890ee23f
CZ
5758 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5759 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5760
aaa36a97
AD
5761 /* EVENT_WRITE_EOP - flush caches, send int */
5762 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5763 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5764 EOP_TC_ACTION_EN |
f84e63f2 5765 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
5766 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5767 EVENT_INDEX(5)));
5768 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 5769 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 5770 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
5771 amdgpu_ring_write(ring, lower_32_bits(seq));
5772 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 5773
aaa36a97
AD
5774}
5775
b8c7b39e 5776static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97
AD
5777{
5778 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 5779 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
5780 uint64_t addr = ring->fence_drv.gpu_addr;
5781
5782 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5783 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
5784 WAIT_REG_MEM_FUNCTION(3) | /* equal */
5785 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
5786 amdgpu_ring_write(ring, addr & 0xfffffffc);
5787 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5788 amdgpu_ring_write(ring, seq);
5789 amdgpu_ring_write(ring, 0xffffffff);
5790 amdgpu_ring_write(ring, 4); /* poll interval */
aaa36a97 5791
5c3422b0 5792 if (usepfp) {
5793 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5794 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5795 amdgpu_ring_write(ring, 0);
5796 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5797 amdgpu_ring_write(ring, 0);
5798 }
b8c7b39e
CK
5799}
5800
5801static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5802 unsigned vm_id, uint64_t pd_addr)
5803{
5804 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 5805
aaa36a97
AD
5806 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5807 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
5808 WRITE_DATA_DST_SEL(0)) |
5809 WR_CONFIRM);
aaa36a97
AD
5810 if (vm_id < 8) {
5811 amdgpu_ring_write(ring,
5812 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5813 } else {
5814 amdgpu_ring_write(ring,
5815 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5816 }
5817 amdgpu_ring_write(ring, 0);
5818 amdgpu_ring_write(ring, pd_addr >> 12);
5819
aaa36a97
AD
5820 /* bits 0-15 are the VM contexts0-15 */
5821 /* invalidate the cache */
5822 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5823 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5824 WRITE_DATA_DST_SEL(0)));
5825 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5826 amdgpu_ring_write(ring, 0);
5827 amdgpu_ring_write(ring, 1 << vm_id);
5828
5829 /* wait for the invalidate to complete */
5830 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5831 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5832 WAIT_REG_MEM_FUNCTION(0) | /* always */
5833 WAIT_REG_MEM_ENGINE(0))); /* me */
5834 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5835 amdgpu_ring_write(ring, 0);
5836 amdgpu_ring_write(ring, 0); /* ref */
5837 amdgpu_ring_write(ring, 0); /* mask */
5838 amdgpu_ring_write(ring, 0x20); /* poll interval */
5839
5840 /* compute doesn't have PFP */
5841 if (usepfp) {
5842 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5843 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5844 amdgpu_ring_write(ring, 0x0);
5c3422b0 5845 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5846 amdgpu_ring_write(ring, 0);
5847 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5848 amdgpu_ring_write(ring, 0);
aaa36a97
AD
5849 }
5850}
5851
aaa36a97
AD
5852static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5853{
5854 return ring->adev->wb.wb[ring->rptr_offs];
5855}
5856
5857static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5858{
5859 return ring->adev->wb.wb[ring->wptr_offs];
5860}
5861
5862static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5863{
5864 struct amdgpu_device *adev = ring->adev;
5865
5866 /* XXX check if swapping is necessary on BE */
5867 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5868 WDOORBELL32(ring->doorbell_index, ring->wptr);
5869}
5870
5871static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5872 u64 addr, u64 seq,
890ee23f 5873 unsigned flags)
aaa36a97 5874{
890ee23f
CZ
5875 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5876 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5877
aaa36a97
AD
5878 /* RELEASE_MEM - flush caches, send int */
5879 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5880 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5881 EOP_TC_ACTION_EN |
a3d5aaa8 5882 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
5883 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5884 EVENT_INDEX(5)));
890ee23f 5885 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
5886 amdgpu_ring_write(ring, addr & 0xfffffffc);
5887 amdgpu_ring_write(ring, upper_32_bits(addr));
5888 amdgpu_ring_write(ring, lower_32_bits(seq));
5889 amdgpu_ring_write(ring, upper_32_bits(seq));
5890}
5891
5892static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5893 enum amdgpu_interrupt_state state)
5894{
5895 u32 cp_int_cntl;
5896
5897 switch (state) {
5898 case AMDGPU_IRQ_STATE_DISABLE:
5899 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5900 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5901 TIME_STAMP_INT_ENABLE, 0);
5902 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5903 break;
5904 case AMDGPU_IRQ_STATE_ENABLE:
5905 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5906 cp_int_cntl =
5907 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5908 TIME_STAMP_INT_ENABLE, 1);
5909 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5910 break;
5911 default:
5912 break;
5913 }
5914}
5915
5916static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5917 int me, int pipe,
5918 enum amdgpu_interrupt_state state)
5919{
5920 u32 mec_int_cntl, mec_int_cntl_reg;
5921
5922 /*
5923 * amdgpu controls only pipe 0 of MEC1. That's why this function only
5924 * handles the setting of interrupts for this specific pipe. All other
5925 * pipes' interrupts are set by amdkfd.
5926 */
5927
5928 if (me == 1) {
5929 switch (pipe) {
5930 case 0:
5931 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5932 break;
5933 default:
5934 DRM_DEBUG("invalid pipe %d\n", pipe);
5935 return;
5936 }
5937 } else {
5938 DRM_DEBUG("invalid me %d\n", me);
5939 return;
5940 }
5941
5942 switch (state) {
5943 case AMDGPU_IRQ_STATE_DISABLE:
5944 mec_int_cntl = RREG32(mec_int_cntl_reg);
5945 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5946 TIME_STAMP_INT_ENABLE, 0);
5947 WREG32(mec_int_cntl_reg, mec_int_cntl);
5948 break;
5949 case AMDGPU_IRQ_STATE_ENABLE:
5950 mec_int_cntl = RREG32(mec_int_cntl_reg);
5951 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5952 TIME_STAMP_INT_ENABLE, 1);
5953 WREG32(mec_int_cntl_reg, mec_int_cntl);
5954 break;
5955 default:
5956 break;
5957 }
5958}
5959
5960static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5961 struct amdgpu_irq_src *source,
5962 unsigned type,
5963 enum amdgpu_interrupt_state state)
5964{
5965 u32 cp_int_cntl;
5966
5967 switch (state) {
5968 case AMDGPU_IRQ_STATE_DISABLE:
5969 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5970 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5971 PRIV_REG_INT_ENABLE, 0);
5972 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5973 break;
5974 case AMDGPU_IRQ_STATE_ENABLE:
5975 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5976 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
04ab3b76 5977 PRIV_REG_INT_ENABLE, 1);
aaa36a97
AD
5978 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5979 break;
5980 default:
5981 break;
5982 }
5983
5984 return 0;
5985}
5986
5987static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5988 struct amdgpu_irq_src *source,
5989 unsigned type,
5990 enum amdgpu_interrupt_state state)
5991{
5992 u32 cp_int_cntl;
5993
5994 switch (state) {
5995 case AMDGPU_IRQ_STATE_DISABLE:
5996 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5997 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5998 PRIV_INSTR_INT_ENABLE, 0);
5999 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6000 break;
6001 case AMDGPU_IRQ_STATE_ENABLE:
6002 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6003 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6004 PRIV_INSTR_INT_ENABLE, 1);
6005 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6006 break;
6007 default:
6008 break;
6009 }
6010
6011 return 0;
6012}
6013
6014static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6015 struct amdgpu_irq_src *src,
6016 unsigned type,
6017 enum amdgpu_interrupt_state state)
6018{
6019 switch (type) {
6020 case AMDGPU_CP_IRQ_GFX_EOP:
6021 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6022 break;
6023 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6024 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6025 break;
6026 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6027 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6028 break;
6029 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6030 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6031 break;
6032 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6033 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6034 break;
6035 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6036 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6037 break;
6038 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6039 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6040 break;
6041 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6042 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6043 break;
6044 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6045 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6046 break;
6047 default:
6048 break;
6049 }
6050 return 0;
6051}
6052
6053static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6054 struct amdgpu_irq_src *source,
6055 struct amdgpu_iv_entry *entry)
6056{
6057 int i;
6058 u8 me_id, pipe_id, queue_id;
6059 struct amdgpu_ring *ring;
6060
6061 DRM_DEBUG("IH: CP EOP\n");
6062 me_id = (entry->ring_id & 0x0c) >> 2;
6063 pipe_id = (entry->ring_id & 0x03) >> 0;
6064 queue_id = (entry->ring_id & 0x70) >> 4;
6065
6066 switch (me_id) {
6067 case 0:
6068 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6069 break;
6070 case 1:
6071 case 2:
6072 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6073 ring = &adev->gfx.compute_ring[i];
6074 /* Per-queue interrupt is supported for MEC starting from VI.
6075 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6076 */
6077 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6078 amdgpu_fence_process(ring);
6079 }
6080 break;
6081 }
6082 return 0;
6083}
6084
6085static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6086 struct amdgpu_irq_src *source,
6087 struct amdgpu_iv_entry *entry)
6088{
6089 DRM_ERROR("Illegal register access in command stream\n");
6090 schedule_work(&adev->reset_work);
6091 return 0;
6092}
6093
6094static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6095 struct amdgpu_irq_src *source,
6096 struct amdgpu_iv_entry *entry)
6097{
6098 DRM_ERROR("Illegal instruction in command stream\n");
6099 schedule_work(&adev->reset_work);
6100 return 0;
6101}
6102
5fc3aeeb 6103const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 6104 .name = "gfx_v8_0",
aaa36a97 6105 .early_init = gfx_v8_0_early_init,
ccba7691 6106 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
6107 .sw_init = gfx_v8_0_sw_init,
6108 .sw_fini = gfx_v8_0_sw_fini,
6109 .hw_init = gfx_v8_0_hw_init,
6110 .hw_fini = gfx_v8_0_hw_fini,
6111 .suspend = gfx_v8_0_suspend,
6112 .resume = gfx_v8_0_resume,
6113 .is_idle = gfx_v8_0_is_idle,
6114 .wait_for_idle = gfx_v8_0_wait_for_idle,
6115 .soft_reset = gfx_v8_0_soft_reset,
aaa36a97
AD
6116 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6117 .set_powergating_state = gfx_v8_0_set_powergating_state,
6118};
6119
6120static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6121 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6122 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6123 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6124 .parse_cs = NULL,
93323131 6125 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 6126 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 6127 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6128 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6129 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 6130 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 6131 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
6132 .test_ring = gfx_v8_0_ring_test_ring,
6133 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6134 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6135 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
6136};
6137
6138static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6139 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6140 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6141 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6142 .parse_cs = NULL,
93323131 6143 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 6144 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 6145 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6146 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6147 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 6148 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 6149 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
6150 .test_ring = gfx_v8_0_ring_test_ring,
6151 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6152 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6153 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
6154};
6155
6156static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6157{
6158 int i;
6159
6160 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6161 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6162
6163 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6164 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6165}
6166
6167static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6168 .set = gfx_v8_0_set_eop_interrupt_state,
6169 .process = gfx_v8_0_eop_irq,
6170};
6171
6172static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6173 .set = gfx_v8_0_set_priv_reg_fault_state,
6174 .process = gfx_v8_0_priv_reg_irq,
6175};
6176
6177static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6178 .set = gfx_v8_0_set_priv_inst_fault_state,
6179 .process = gfx_v8_0_priv_inst_irq,
6180};
6181
6182static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6183{
6184 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6185 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6186
6187 adev->gfx.priv_reg_irq.num_types = 1;
6188 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6189
6190 adev->gfx.priv_inst_irq.num_types = 1;
6191 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6192}
6193
dbff57bc
AD
6194static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6195{
6196 switch (adev->asic_type) {
6197 case CHIP_TOPAZ:
6198 case CHIP_STONEY:
6199 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6200 break;
6201 case CHIP_CARRIZO:
6202 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6203 break;
6204 default:
6205 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6206 break;
6207 }
6208}
6209
aaa36a97
AD
6210static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6211{
6212 /* init asci gds info */
6213 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6214 adev->gds.gws.total_size = 64;
6215 adev->gds.oa.total_size = 16;
6216
6217 if (adev->gds.mem.total_size == 64 * 1024) {
6218 adev->gds.mem.gfx_partition_size = 4096;
6219 adev->gds.mem.cs_partition_size = 4096;
6220
6221 adev->gds.gws.gfx_partition_size = 4;
6222 adev->gds.gws.cs_partition_size = 4;
6223
6224 adev->gds.oa.gfx_partition_size = 4;
6225 adev->gds.oa.cs_partition_size = 1;
6226 } else {
6227 adev->gds.mem.gfx_partition_size = 1024;
6228 adev->gds.mem.cs_partition_size = 1024;
6229
6230 adev->gds.gws.gfx_partition_size = 16;
6231 adev->gds.gws.cs_partition_size = 16;
6232
6233 adev->gds.oa.gfx_partition_size = 4;
6234 adev->gds.oa.cs_partition_size = 4;
6235 }
6236}
6237
8f8e00c1 6238static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 6239{
8f8e00c1 6240 u32 data, mask;
aaa36a97 6241
8f8e00c1
AD
6242 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6243 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 6244
8f8e00c1
AD
6245 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6246 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
aaa36a97 6247
6157bd7a 6248 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 6249
8f8e00c1 6250 return (~data) & mask;
aaa36a97
AD
6251}
6252
7dae69a2 6253static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
6254{
6255 int i, j, k, counter, active_cu_number = 0;
6256 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 6257 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
aaa36a97 6258
6157bd7a
FC
6259 memset(cu_info, 0, sizeof(*cu_info));
6260
aaa36a97
AD
6261 mutex_lock(&adev->grbm_idx_mutex);
6262 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6263 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6264 mask = 1;
6265 ao_bitmap = 0;
6266 counter = 0;
8f8e00c1
AD
6267 gfx_v8_0_select_se_sh(adev, i, j);
6268 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
6269 cu_info->bitmap[i][j] = bitmap;
6270
8f8e00c1 6271 for (k = 0; k < 16; k ++) {
aaa36a97
AD
6272 if (bitmap & mask) {
6273 if (counter < 2)
6274 ao_bitmap |= mask;
6275 counter ++;
6276 }
6277 mask <<= 1;
6278 }
6279 active_cu_number += counter;
6280 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6281 }
6282 }
8f8e00c1
AD
6283 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6284 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
6285
6286 cu_info->number = active_cu_number;
6287 cu_info->ao_cu_mask = ao_cu_mask;
aaa36a97 6288}