Merge branches 'acpi-misc' and 'acpi-doc'
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
CommitLineData
b1023571
KW
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
c366be54
SR
23
24#include <linux/delay.h>
c1b24a14 25#include <linux/kernel.h>
b1023571 26#include <linux/firmware.h>
47b757fb
SR
27#include <linux/module.h>
28#include <linux/pci.h>
c366be54 29
b1023571
KW
30#include "amdgpu.h"
31#include "amdgpu_gfx.h"
32#include "soc15.h"
33#include "soc15d.h"
3251c043 34#include "amdgpu_atomfirmware.h"
80f41f84 35#include "amdgpu_pm.h"
b1023571 36
cde5c34f
FX
37#include "gc/gc_9_0_offset.h"
38#include "gc/gc_9_0_sh_mask.h"
fb960bd2 39#include "vega10_enum.h"
75199b8c 40#include "hdp/hdp_4_0_offset.h"
b1023571 41
5326ad54 42#include "soc15.h"
b1023571
KW
43#include "soc15_common.h"
44#include "clearstate_gfx9.h"
45#include "v9_structs.h"
46
44a99b65
AG
47#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
760a1d55
FX
49#include "amdgpu_ras.h"
50
b1023571 51#define GFX9_NUM_GFX_RINGS 1
17e4bd6c 52#define GFX9_MEC_HPD_SIZE 4096
6bce4667
HZ
53#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
b1023571 55
91d3130a
HZ
56#define mmPWR_MISC_CNTL_STATUS 0x0183
57#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
58#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
59#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
60#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
61#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
b1023571
KW
62
63MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
739ffd9b
AD
70MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
940328fe
FX
77MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
060d124b
CZ
84MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86MODULE_FIRMWARE("amdgpu/raven_me.bin");
87MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
501a580a
LG
91MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
c50fe0c5 97MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
501a580a 98
cf4b60c6
FX
99MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
80f41f84 105MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
cf4b60c6 106
946a4d5b
SL
107static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108{
54d682d9 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
c55045ad 110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
946a4d5b 111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
946a4d5b
SL
112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
946a4d5b 115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
c5fb5426
FX
116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
946a4d5b
SL
120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
919a94d8
TZ
125 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
b1023571
KW
129};
130
946a4d5b 131static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
b1023571 132{
946a4d5b
SL
133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
e6d57520
FX
135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
946a4d5b
SL
138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
e6d57520 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
946a4d5b 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
e6d57520
FX
142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
946a4d5b 147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
e6d57520
FX
148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
919a94d8 150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
b1023571
KW
151};
152
bb5368aa
FX
153static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154{
ac26b0f3 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
bb5368aa
FX
156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166};
167
946a4d5b
SL
168static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169{
170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
f9f97e3c
TZ
190 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
a5fdb336
CZ
194};
195
946a4d5b 196static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
a5fdb336 197{
946a4d5b
SL
198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
b1023571
KW
205};
206
28ab1229
FX
207static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208{
209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228};
229
946a4d5b 230static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
f5eaffcc 231{
f7b1844b 232 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
946a4d5b
SL
233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
f5eaffcc
KW
235};
236
62b35f9a
HZ
237static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238{
239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255};
256
257static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258{
259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
f9f97e3c
TZ
268 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
62b35f9a
HZ
272};
273
727b888f
HR
274static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275{
276 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284};
285
286static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287{
288 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296};
297
b1023571 298#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
62b35f9a 299#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
7b6ba9ea 300#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
28ab1229 301#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
b1023571
KW
302
303static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308 struct amdgpu_cu_info *cu_info);
309static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
635e7132 311static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
eb03e795 312static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
b1023571
KW
313
314static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
315{
316 switch (adev->asic_type) {
317 case CHIP_VEGA10:
98cad2de
TH
318 if (!amdgpu_virt_support_skip_setting(adev)) {
319 soc15_program_register_sequence(adev,
320 golden_settings_gc_9_0,
321 ARRAY_SIZE(golden_settings_gc_9_0));
322 soc15_program_register_sequence(adev,
323 golden_settings_gc_9_0_vg10,
324 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
325 }
b1023571 326 break;
d5e8ef06 327 case CHIP_VEGA12:
62b35f9a
HZ
328 soc15_program_register_sequence(adev,
329 golden_settings_gc_9_2_1,
330 ARRAY_SIZE(golden_settings_gc_9_2_1));
331 soc15_program_register_sequence(adev,
332 golden_settings_gc_9_2_1_vg12,
333 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
d5e8ef06 334 break;
bb5368aa
FX
335 case CHIP_VEGA20:
336 soc15_program_register_sequence(adev,
337 golden_settings_gc_9_0,
338 ARRAY_SIZE(golden_settings_gc_9_0));
339 soc15_program_register_sequence(adev,
340 golden_settings_gc_9_0_vg20,
341 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
342 break;
a5fdb336 343 case CHIP_RAVEN:
28ab1229
FX
344 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
345 ARRAY_SIZE(golden_settings_gc_9_1));
346 if (adev->rev_id >= 8)
347 soc15_program_register_sequence(adev,
348 golden_settings_gc_9_1_rv2,
349 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
350 else
351 soc15_program_register_sequence(adev,
352 golden_settings_gc_9_1_rv1,
353 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
354 break;
b1023571
KW
355 default:
356 break;
357 }
f5eaffcc 358
946a4d5b 359 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
f5eaffcc 360 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
b1023571
KW
361}
362
363static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
364{
6a05148f 365 adev->gfx.scratch.num_reg = 8;
b1023571
KW
366 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
367 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
368}
369
370static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
371 bool wc, uint32_t reg, uint32_t val)
372{
373 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
374 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
375 WRITE_DATA_DST_SEL(0) |
376 (wc ? WR_CONFIRM : 0));
377 amdgpu_ring_write(ring, reg);
378 amdgpu_ring_write(ring, 0);
379 amdgpu_ring_write(ring, val);
380}
381
382static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
383 int mem_space, int opt, uint32_t addr0,
384 uint32_t addr1, uint32_t ref, uint32_t mask,
385 uint32_t inv)
386{
387 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
388 amdgpu_ring_write(ring,
389 /* memory (1) or register (0) */
390 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
391 WAIT_REG_MEM_OPERATION(opt) | /* wait */
392 WAIT_REG_MEM_FUNCTION(3) | /* equal */
393 WAIT_REG_MEM_ENGINE(eng_sel)));
394
395 if (mem_space)
396 BUG_ON(addr0 & 0x3); /* Dword align */
397 amdgpu_ring_write(ring, addr0);
398 amdgpu_ring_write(ring, addr1);
399 amdgpu_ring_write(ring, ref);
400 amdgpu_ring_write(ring, mask);
401 amdgpu_ring_write(ring, inv); /* poll interval */
402}
403
404static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
405{
406 struct amdgpu_device *adev = ring->adev;
407 uint32_t scratch;
408 uint32_t tmp = 0;
409 unsigned i;
410 int r;
411
412 r = amdgpu_gfx_scratch_get(adev, &scratch);
dc9eeff8 413 if (r)
b1023571 414 return r;
dc9eeff8 415
b1023571
KW
416 WREG32(scratch, 0xCAFEDEAD);
417 r = amdgpu_ring_alloc(ring, 3);
dc9eeff8
CK
418 if (r)
419 goto error_free_scratch;
420
b1023571
KW
421 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
422 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
423 amdgpu_ring_write(ring, 0xDEADBEEF);
424 amdgpu_ring_commit(ring);
425
426 for (i = 0; i < adev->usec_timeout; i++) {
427 tmp = RREG32(scratch);
428 if (tmp == 0xDEADBEEF)
429 break;
c366be54 430 udelay(1);
b1023571 431 }
dc9eeff8
CK
432
433 if (i >= adev->usec_timeout)
434 r = -ETIMEDOUT;
435
436error_free_scratch:
b1023571
KW
437 amdgpu_gfx_scratch_free(adev, scratch);
438 return r;
439}
440
441static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
442{
ed9324af
ML
443 struct amdgpu_device *adev = ring->adev;
444 struct amdgpu_ib ib;
445 struct dma_fence *f = NULL;
446
447 unsigned index;
448 uint64_t gpu_addr;
449 uint32_t tmp;
450 long r;
451
452 r = amdgpu_device_wb_get(adev, &index);
98079389 453 if (r)
ed9324af 454 return r;
ed9324af
ML
455
456 gpu_addr = adev->wb.gpu_addr + (index * 4);
457 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
458 memset(&ib, 0, sizeof(ib));
459 r = amdgpu_ib_get(adev, NULL, 16, &ib);
98079389 460 if (r)
ed9324af 461 goto err1;
98079389 462
ed9324af
ML
463 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
464 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
465 ib.ptr[2] = lower_32_bits(gpu_addr);
466 ib.ptr[3] = upper_32_bits(gpu_addr);
467 ib.ptr[4] = 0xDEADBEEF;
468 ib.length_dw = 5;
b1023571 469
ed9324af
ML
470 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
471 if (r)
472 goto err2;
b1023571 473
ed9324af
ML
474 r = dma_fence_wait_timeout(f, false, timeout);
475 if (r == 0) {
98079389
CK
476 r = -ETIMEDOUT;
477 goto err2;
ed9324af 478 } else if (r < 0) {
98079389 479 goto err2;
ed9324af
ML
480 }
481
482 tmp = adev->wb.wb[index];
98079389
CK
483 if (tmp == 0xDEADBEEF)
484 r = 0;
485 else
486 r = -EINVAL;
b1023571 487
b1023571 488err2:
ed9324af
ML
489 amdgpu_ib_free(adev, &ib, NULL);
490 dma_fence_put(f);
b1023571 491err1:
ed9324af
ML
492 amdgpu_device_wb_free(adev, index);
493 return r;
b1023571
KW
494}
495
c833d8aa
ML
496
497static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
498{
499 release_firmware(adev->gfx.pfp_fw);
500 adev->gfx.pfp_fw = NULL;
501 release_firmware(adev->gfx.me_fw);
502 adev->gfx.me_fw = NULL;
503 release_firmware(adev->gfx.ce_fw);
504 adev->gfx.ce_fw = NULL;
505 release_firmware(adev->gfx.rlc_fw);
506 adev->gfx.rlc_fw = NULL;
507 release_firmware(adev->gfx.mec_fw);
508 adev->gfx.mec_fw = NULL;
509 release_firmware(adev->gfx.mec2_fw);
510 adev->gfx.mec2_fw = NULL;
511
512 kfree(adev->gfx.rlc.register_list_format);
513}
514
621a6318
HR
515static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
516{
517 const struct rlc_firmware_header_v2_1 *rlc_hdr;
518
519 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
520 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
521 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
522 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
523 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
524 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
525 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
526 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
527 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
528 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
529 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
530 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
531 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
532 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
533 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
534}
535
39b62541
ED
536static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
537{
538 adev->gfx.me_fw_write_wait = false;
539 adev->gfx.mec_fw_write_wait = false;
540
541 switch (adev->asic_type) {
542 case CHIP_VEGA10:
543 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
544 (adev->gfx.me_feature_version >= 42) &&
545 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
546 (adev->gfx.pfp_feature_version >= 42))
547 adev->gfx.me_fw_write_wait = true;
548
549 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
550 (adev->gfx.mec_feature_version >= 42))
551 adev->gfx.mec_fw_write_wait = true;
552 break;
553 case CHIP_VEGA12:
554 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
555 (adev->gfx.me_feature_version >= 44) &&
556 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
557 (adev->gfx.pfp_feature_version >= 44))
558 adev->gfx.me_fw_write_wait = true;
559
560 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
561 (adev->gfx.mec_feature_version >= 44))
562 adev->gfx.mec_fw_write_wait = true;
563 break;
564 case CHIP_VEGA20:
565 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
566 (adev->gfx.me_feature_version >= 44) &&
567 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
568 (adev->gfx.pfp_feature_version >= 44))
569 adev->gfx.me_fw_write_wait = true;
570
571 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
572 (adev->gfx.mec_feature_version >= 44))
573 adev->gfx.mec_fw_write_wait = true;
574 break;
575 case CHIP_RAVEN:
576 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
577 (adev->gfx.me_feature_version >= 42) &&
578 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
579 (adev->gfx.pfp_feature_version >= 42))
580 adev->gfx.me_fw_write_wait = true;
581
582 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
583 (adev->gfx.mec_feature_version >= 42))
584 adev->gfx.mec_fw_write_wait = true;
585 break;
a00ead2b
RZ
586 default:
587 break;
39b62541
ED
588 }
589}
590
00544006
HR
591static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
592{
593 switch (adev->asic_type) {
594 case CHIP_VEGA10:
595 case CHIP_VEGA12:
596 case CHIP_VEGA20:
597 break;
598 case CHIP_RAVEN:
599 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
600 break;
80f41f84
PL
601 if ((adev->gfx.rlc_fw_version != 106 &&
602 adev->gfx.rlc_fw_version < 531) ||
083d0229 603 (adev->gfx.rlc_fw_version == 53815) ||
00544006
HR
604 (adev->gfx.rlc_feature_version < 1) ||
605 !adev->gfx.rlc.is_rlc_v2_1)
606 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
98f58ada
AD
607 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
608 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
609 AMD_PG_SUPPORT_CP |
610 AMD_PG_SUPPORT_RLC_SMU_HS;
00544006
HR
611 break;
612 default:
613 break;
614 }
615}
616
b1023571
KW
617static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
618{
619 const char *chip_name;
620 char fw_name[30];
621 int err;
622 struct amdgpu_firmware_info *info = NULL;
623 const struct common_firmware_header *header = NULL;
624 const struct gfx_firmware_header_v1_0 *cp_hdr;
a4d41ad0
HZ
625 const struct rlc_firmware_header_v2_0 *rlc_hdr;
626 unsigned int *tmp = NULL;
627 unsigned int i = 0;
621a6318
HR
628 uint16_t version_major;
629 uint16_t version_minor;
80f41f84 630 uint32_t smu_version;
b1023571
KW
631
632 DRM_DEBUG("\n");
633
634 switch (adev->asic_type) {
635 case CHIP_VEGA10:
636 chip_name = "vega10";
637 break;
739ffd9b
AD
638 case CHIP_VEGA12:
639 chip_name = "vega12";
640 break;
940328fe
FX
641 case CHIP_VEGA20:
642 chip_name = "vega20";
643 break;
eaa85724 644 case CHIP_RAVEN:
cf4b60c6
FX
645 if (adev->rev_id >= 8)
646 chip_name = "raven2";
741deade
AD
647 else if (adev->pdev->device == 0x15d8)
648 chip_name = "picasso";
cf4b60c6
FX
649 else
650 chip_name = "raven";
eaa85724 651 break;
b1023571
KW
652 default:
653 BUG();
654 }
655
656 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
657 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
658 if (err)
659 goto out;
660 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
661 if (err)
662 goto out;
663 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
664 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
665 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
666
667 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
668 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
669 if (err)
670 goto out;
671 err = amdgpu_ucode_validate(adev->gfx.me_fw);
672 if (err)
673 goto out;
674 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
675 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
676 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
677
678 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
679 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
680 if (err)
681 goto out;
682 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
683 if (err)
684 goto out;
685 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
686 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
687 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
688
c50fe0c5
AL
689 /*
690 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
691 * instead of picasso_rlc.bin.
692 * Judgment method:
693 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
694 * or revision >= 0xD8 && revision <= 0xDF
695 * otherwise is PCO FP5
696 */
697 if (!strcmp(chip_name, "picasso") &&
698 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
699 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
700 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
80f41f84
PL
701 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
702 (smu_version >= 0x41e2b))
703 /**
704 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
705 */
706 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
c50fe0c5
AL
707 else
708 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
b1023571
KW
709 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
710 if (err)
711 goto out;
712 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
a4d41ad0 713 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
621a6318
HR
714
715 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
716 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
717 if (version_major == 2 && version_minor == 1)
718 adev->gfx.rlc.is_rlc_v2_1 = true;
719
a4d41ad0
HZ
720 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
721 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
722 adev->gfx.rlc.save_and_restore_offset =
723 le32_to_cpu(rlc_hdr->save_and_restore_offset);
724 adev->gfx.rlc.clear_state_descriptor_offset =
725 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
726 adev->gfx.rlc.avail_scratch_ram_locations =
727 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
728 adev->gfx.rlc.reg_restore_list_size =
729 le32_to_cpu(rlc_hdr->reg_restore_list_size);
730 adev->gfx.rlc.reg_list_format_start =
731 le32_to_cpu(rlc_hdr->reg_list_format_start);
732 adev->gfx.rlc.reg_list_format_separate_start =
733 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
734 adev->gfx.rlc.starting_offsets_start =
735 le32_to_cpu(rlc_hdr->starting_offsets_start);
736 adev->gfx.rlc.reg_list_format_size_bytes =
737 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
738 adev->gfx.rlc.reg_list_size_bytes =
739 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
740 adev->gfx.rlc.register_list_format =
741 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
742 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
743 if (!adev->gfx.rlc.register_list_format) {
744 err = -ENOMEM;
745 goto out;
746 }
747
748 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
749 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
06668916 750 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
a4d41ad0
HZ
751 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
752
753 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
754
755 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
756 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
06668916 757 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
a4d41ad0 758 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
b1023571 759
621a6318
HR
760 if (adev->gfx.rlc.is_rlc_v2_1)
761 gfx_v9_0_init_rlc_ext_microcode(adev);
762
b1023571
KW
763 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
764 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
765 if (err)
766 goto out;
767 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
768 if (err)
769 goto out;
770 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
771 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
772 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
773
774
775 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
776 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
777 if (!err) {
778 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
779 if (err)
780 goto out;
781 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
782 adev->gfx.mec2_fw->data;
783 adev->gfx.mec2_fw_version =
784 le32_to_cpu(cp_hdr->header.ucode_version);
785 adev->gfx.mec2_feature_version =
786 le32_to_cpu(cp_hdr->ucode_feature_version);
787 } else {
788 err = 0;
789 adev->gfx.mec2_fw = NULL;
790 }
791
792 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
793 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
794 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
795 info->fw = adev->gfx.pfp_fw;
796 header = (const struct common_firmware_header *)info->fw->data;
797 adev->firmware.fw_size +=
798 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
799
800 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
801 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
802 info->fw = adev->gfx.me_fw;
803 header = (const struct common_firmware_header *)info->fw->data;
804 adev->firmware.fw_size +=
805 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
806
807 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
808 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
809 info->fw = adev->gfx.ce_fw;
810 header = (const struct common_firmware_header *)info->fw->data;
811 adev->firmware.fw_size +=
812 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
813
814 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
815 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
816 info->fw = adev->gfx.rlc_fw;
817 header = (const struct common_firmware_header *)info->fw->data;
818 adev->firmware.fw_size +=
819 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
820
b58b65cf
EQ
821 if (adev->gfx.rlc.is_rlc_v2_1 &&
822 adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
823 adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
824 adev->gfx.rlc.save_restore_list_srm_size_bytes) {
621a6318
HR
825 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
826 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
827 info->fw = adev->gfx.rlc_fw;
828 adev->firmware.fw_size +=
829 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
830
831 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
832 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
833 info->fw = adev->gfx.rlc_fw;
834 adev->firmware.fw_size +=
835 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
836
837 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
838 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
839 info->fw = adev->gfx.rlc_fw;
840 adev->firmware.fw_size +=
841 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
842 }
843
b1023571
KW
844 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
845 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
846 info->fw = adev->gfx.mec_fw;
847 header = (const struct common_firmware_header *)info->fw->data;
848 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
849 adev->firmware.fw_size +=
850 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
851
852 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
853 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
854 info->fw = adev->gfx.mec_fw;
855 adev->firmware.fw_size +=
856 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
857
858 if (adev->gfx.mec2_fw) {
859 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
860 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
861 info->fw = adev->gfx.mec2_fw;
862 header = (const struct common_firmware_header *)info->fw->data;
863 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
864 adev->firmware.fw_size +=
865 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
866 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
867 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
868 info->fw = adev->gfx.mec2_fw;
869 adev->firmware.fw_size +=
870 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
871 }
872
873 }
874
875out:
00544006 876 gfx_v9_0_check_if_need_gfxoff(adev);
39b62541 877 gfx_v9_0_check_fw_write_wait(adev);
b1023571
KW
878 if (err) {
879 dev_err(adev->dev,
880 "gfx9: Failed to load firmware \"%s\"\n",
881 fw_name);
882 release_firmware(adev->gfx.pfp_fw);
883 adev->gfx.pfp_fw = NULL;
884 release_firmware(adev->gfx.me_fw);
885 adev->gfx.me_fw = NULL;
886 release_firmware(adev->gfx.ce_fw);
887 adev->gfx.ce_fw = NULL;
888 release_firmware(adev->gfx.rlc_fw);
889 adev->gfx.rlc_fw = NULL;
890 release_firmware(adev->gfx.mec_fw);
891 adev->gfx.mec_fw = NULL;
892 release_firmware(adev->gfx.mec2_fw);
893 adev->gfx.mec2_fw = NULL;
894 }
895 return err;
896}
897
c9719c69
HZ
898static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
899{
900 u32 count = 0;
901 const struct cs_section_def *sect = NULL;
902 const struct cs_extent_def *ext = NULL;
903
904 /* begin clear state */
905 count += 2;
906 /* context control state */
907 count += 3;
908
909 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
910 for (ext = sect->section; ext->extent != NULL; ++ext) {
911 if (sect->id == SECT_CONTEXT)
912 count += 2 + ext->reg_count;
913 else
914 return 0;
915 }
916 }
917
918 /* end clear state */
919 count += 2;
920 /* clear state */
921 count += 2;
922
923 return count;
924}
925
926static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
927 volatile u32 *buffer)
928{
929 u32 count = 0, i;
930 const struct cs_section_def *sect = NULL;
931 const struct cs_extent_def *ext = NULL;
932
933 if (adev->gfx.rlc.cs_data == NULL)
934 return;
935 if (buffer == NULL)
936 return;
937
938 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
939 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
940
941 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
942 buffer[count++] = cpu_to_le32(0x80000000);
943 buffer[count++] = cpu_to_le32(0x80000000);
944
945 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
946 for (ext = sect->section; ext->extent != NULL; ++ext) {
947 if (sect->id == SECT_CONTEXT) {
948 buffer[count++] =
949 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
950 buffer[count++] = cpu_to_le32(ext->reg_index -
951 PACKET3_SET_CONTEXT_REG_START);
952 for (i = 0; i < ext->reg_count; i++)
953 buffer[count++] = cpu_to_le32(ext->extent[i]);
954 } else {
955 return;
956 }
957 }
958 }
959
960 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
961 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
962
963 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
964 buffer[count++] = cpu_to_le32(0);
965}
966
989b6823
EQ
967static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
968{
969 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
970 uint32_t pg_always_on_cu_num = 2;
971 uint32_t always_on_cu_num;
972 uint32_t i, j, k;
973 uint32_t mask, cu_bitmap, counter;
974
975 if (adev->flags & AMD_IS_APU)
976 always_on_cu_num = 4;
977 else if (adev->asic_type == CHIP_VEGA12)
978 always_on_cu_num = 8;
979 else
980 always_on_cu_num = 12;
981
982 mutex_lock(&adev->grbm_idx_mutex);
983 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
984 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
985 mask = 1;
986 cu_bitmap = 0;
987 counter = 0;
988 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
989
990 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
991 if (cu_info->bitmap[i][j] & mask) {
992 if (counter == pg_always_on_cu_num)
993 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
994 if (counter < always_on_cu_num)
995 cu_bitmap |= mask;
996 else
997 break;
998 counter++;
999 }
1000 mask <<= 1;
1001 }
1002
1003 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1004 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1005 }
1006 }
1007 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1008 mutex_unlock(&adev->grbm_idx_mutex);
1009}
1010
ba7bb665
HZ
1011static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1012{
e5475e16 1013 uint32_t data;
ba7bb665
HZ
1014
1015 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1016 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1017 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1018 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1019 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1020
1021 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1022 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1023
1024 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1025 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1026
1027 mutex_lock(&adev->grbm_idx_mutex);
1028 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1029 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1030 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1031
1032 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
e5475e16
TSD
1033 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1034 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1035 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
ba7bb665
HZ
1036 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1037
1038 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1039 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1040 data &= 0x0000FFFF;
1041 data |= 0x00C00000;
1042 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1043
b989531b
EQ
1044 /*
1045 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1046 * programmed in gfx_v9_0_init_always_on_cu_mask()
1047 */
ba7bb665
HZ
1048
1049 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1050 * but used for RLC_LB_CNTL configuration */
1051 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
e5475e16
TSD
1052 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1053 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
ba7bb665
HZ
1054 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1055 mutex_unlock(&adev->grbm_idx_mutex);
b989531b
EQ
1056
1057 gfx_v9_0_init_always_on_cu_mask(adev);
ba7bb665
HZ
1058}
1059
989b6823
EQ
1060static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1061{
1062 uint32_t data;
1063
1064 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1065 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1066 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1067 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1068 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1069
1070 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1071 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1072
1073 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1074 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1075
1076 mutex_lock(&adev->grbm_idx_mutex);
1077 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1078 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1079 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1080
1081 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1082 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1083 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1084 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1085 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1086
1087 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1088 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1089 data &= 0x0000FFFF;
1090 data |= 0x00C00000;
1091 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1092
1093 /*
1094 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1095 * programmed in gfx_v9_0_init_always_on_cu_mask()
1096 */
1097
1098 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1099 * but used for RLC_LB_CNTL configuration */
1100 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1101 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1102 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1103 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1104 mutex_unlock(&adev->grbm_idx_mutex);
1105
1106 gfx_v9_0_init_always_on_cu_mask(adev);
1107}
1108
e8835e0e
HZ
1109static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1110{
e5475e16 1111 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
e8835e0e
HZ
1112}
1113
106c7d61 1114static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
c9719c69 1115{
106c7d61 1116 return 5;
c9719c69
HZ
1117}
1118
1119static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1120{
c9719c69
HZ
1121 const struct cs_section_def *cs_data;
1122 int r;
1123
1124 adev->gfx.rlc.cs_data = gfx9_cs_data;
1125
1126 cs_data = adev->gfx.rlc.cs_data;
1127
1128 if (cs_data) {
106c7d61
LG
1129 /* init clear state block */
1130 r = amdgpu_gfx_rlc_init_csb(adev);
1131 if (r)
a4a02777 1132 return r;
c9719c69
HZ
1133 }
1134
741deade 1135 if (adev->asic_type == CHIP_RAVEN) {
c9719c69
HZ
1136 /* TODO: double check the cp_table_size for RV */
1137 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
106c7d61
LG
1138 r = amdgpu_gfx_rlc_init_cpt(adev);
1139 if (r)
a4a02777 1140 return r;
989b6823 1141 }
ba7bb665 1142
989b6823
EQ
1143 switch (adev->asic_type) {
1144 case CHIP_RAVEN:
ba7bb665 1145 gfx_v9_0_init_lbpw(adev);
989b6823
EQ
1146 break;
1147 case CHIP_VEGA20:
1148 gfx_v9_4_init_lbpw(adev);
1149 break;
1150 default:
1151 break;
c9719c69
HZ
1152 }
1153
1154 return 0;
1155}
1156
137dc4b9
EQ
1157static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1158{
1159 int r;
1160
1161 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1162 if (unlikely(r != 0))
1163 return r;
1164
1165 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1166 AMDGPU_GEM_DOMAIN_VRAM);
1167 if (!r)
1168 adev->gfx.rlc.clear_state_gpu_addr =
1169 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1170
1171 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1172
1173 return r;
1174}
1175
1176static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1177{
1178 int r;
1179
1180 if (!adev->gfx.rlc.clear_state_obj)
1181 return;
1182
1183 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1184 if (likely(r == 0)) {
1185 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1186 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1187 }
1188}
1189
b1023571
KW
1190static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1191{
078af1a3
CK
1192 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1193 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
b1023571
KW
1194}
1195
b1023571
KW
1196static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1197{
1198 int r;
1199 u32 *hpd;
1200 const __le32 *fw_data;
1201 unsigned fw_size;
1202 u32 *fw;
42794b27 1203 size_t mec_hpd_size;
b1023571
KW
1204
1205 const struct gfx_firmware_header_v1_0 *mec_hdr;
1206
78c16834
AR
1207 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1208
78c16834 1209 /* take ownership of the relevant compute queues */
41f6a99a 1210 amdgpu_gfx_compute_queue_acquire(adev);
78c16834 1211 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
b1023571 1212
a4a02777 1213 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
b44da694 1214 AMDGPU_GEM_DOMAIN_VRAM,
a4a02777
CK
1215 &adev->gfx.mec.hpd_eop_obj,
1216 &adev->gfx.mec.hpd_eop_gpu_addr,
1217 (void **)&hpd);
b1023571 1218 if (r) {
a4a02777 1219 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
b1023571
KW
1220 gfx_v9_0_mec_fini(adev);
1221 return r;
1222 }
1223
1224 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1225
1226 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1227 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1228
1229 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1230
1231 fw_data = (const __le32 *)
1232 (adev->gfx.mec_fw->data +
1233 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1234 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1235
a4a02777
CK
1236 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1237 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1238 &adev->gfx.mec.mec_fw_obj,
1239 &adev->gfx.mec.mec_fw_gpu_addr,
1240 (void **)&fw);
b1023571 1241 if (r) {
a4a02777 1242 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
b1023571
KW
1243 gfx_v9_0_mec_fini(adev);
1244 return r;
1245 }
a4a02777 1246
b1023571
KW
1247 memcpy(fw, fw_data, fw_size);
1248
1249 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1250 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1251
b1023571
KW
1252 return 0;
1253}
1254
1255static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1256{
5e78835a 1257 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
b1023571
KW
1258 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1259 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1260 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1261 (SQ_IND_INDEX__FORCE_READ_MASK));
5e78835a 1262 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
b1023571
KW
1263}
1264
1265static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1266 uint32_t wave, uint32_t thread,
1267 uint32_t regno, uint32_t num, uint32_t *out)
1268{
5e78835a 1269 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
b1023571
KW
1270 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1271 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1272 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1273 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1274 (SQ_IND_INDEX__FORCE_READ_MASK) |
1275 (SQ_IND_INDEX__AUTO_INCR_MASK));
1276 while (num--)
5e78835a 1277 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
b1023571
KW
1278}
1279
1280static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1281{
1282 /* type 1 wave data */
1283 dst[(*no_fields)++] = 1;
1284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1293 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1294 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1295 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1296 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1297 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1298}
1299
1300static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1301 uint32_t wave, uint32_t start,
1302 uint32_t size, uint32_t *dst)
1303{
1304 wave_read_regs(
1305 adev, simd, wave, 0,
1306 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1307}
1308
822770ad
NH
1309static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1310 uint32_t wave, uint32_t thread,
1311 uint32_t start, uint32_t size,
1312 uint32_t *dst)
1313{
1314 wave_read_regs(
1315 adev, simd, wave, thread,
1316 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1317}
b1023571 1318
f7a9ee81 1319static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
0fa4246e 1320 u32 me, u32 pipe, u32 q, u32 vm)
f7a9ee81 1321{
0fa4246e 1322 soc15_grbm_select(adev, me, pipe, q, vm);
f7a9ee81
AG
1323}
1324
b1023571
KW
1325static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1326 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1327 .select_se_sh = &gfx_v9_0_select_se_sh,
1328 .read_wave_data = &gfx_v9_0_read_wave_data,
1329 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
822770ad 1330 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
f7a9ee81 1331 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
b1023571
KW
1332};
1333
3251c043 1334static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
b1023571
KW
1335{
1336 u32 gb_addr_config;
3251c043 1337 int err;
b1023571
KW
1338
1339 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1340
1341 switch (adev->asic_type) {
1342 case CHIP_VEGA10:
b1023571 1343 adev->gfx.config.max_hw_contexts = 8;
b1023571
KW
1344 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1345 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1346 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1347 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1348 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1349 break;
e5c62edd
AD
1350 case CHIP_VEGA12:
1351 adev->gfx.config.max_hw_contexts = 8;
1352 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1353 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1354 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1355 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
62b35f9a 1356 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
e5c62edd
AD
1357 DRM_INFO("fix gfx.config for vega12\n");
1358 break;
d3adedb4
FX
1359 case CHIP_VEGA20:
1360 adev->gfx.config.max_hw_contexts = 8;
1361 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1362 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1363 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1364 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1365 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1366 gb_addr_config &= ~0xf3e777ff;
1367 gb_addr_config |= 0x22014042;
3251c043
AD
1368 /* check vbios table if gpu info is not available */
1369 err = amdgpu_atomfirmware_get_gfx_info(adev);
1370 if (err)
1371 return err;
d3adedb4 1372 break;
5cf7433d
CZ
1373 case CHIP_RAVEN:
1374 adev->gfx.config.max_hw_contexts = 8;
1375 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1376 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1377 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1378 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
28ab1229
FX
1379 if (adev->rev_id >= 8)
1380 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1381 else
1382 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
5cf7433d 1383 break;
b1023571
KW
1384 default:
1385 BUG();
1386 break;
1387 }
1388
1389 adev->gfx.config.gb_addr_config = gb_addr_config;
1390
1391 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1392 REG_GET_FIELD(
1393 adev->gfx.config.gb_addr_config,
1394 GB_ADDR_CONFIG,
1395 NUM_PIPES);
ad7d0ff3
AD
1396
1397 adev->gfx.config.max_tile_pipes =
1398 adev->gfx.config.gb_addr_config_fields.num_pipes;
1399
b1023571
KW
1400 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1401 REG_GET_FIELD(
1402 adev->gfx.config.gb_addr_config,
1403 GB_ADDR_CONFIG,
1404 NUM_BANKS);
1405 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1406 REG_GET_FIELD(
1407 adev->gfx.config.gb_addr_config,
1408 GB_ADDR_CONFIG,
1409 MAX_COMPRESSED_FRAGS);
1410 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1411 REG_GET_FIELD(
1412 adev->gfx.config.gb_addr_config,
1413 GB_ADDR_CONFIG,
1414 NUM_RB_PER_SE);
1415 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1416 REG_GET_FIELD(
1417 adev->gfx.config.gb_addr_config,
1418 GB_ADDR_CONFIG,
1419 NUM_SHADER_ENGINES);
1420 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1421 REG_GET_FIELD(
1422 adev->gfx.config.gb_addr_config,
1423 GB_ADDR_CONFIG,
1424 PIPE_INTERLEAVE_SIZE));
3251c043
AD
1425
1426 return 0;
b1023571
KW
1427}
1428
1429static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1430 struct amdgpu_ngg_buf *ngg_buf,
1431 int size_se,
1432 int default_size_se)
1433{
1434 int r;
1435
1436 if (size_se < 0) {
1437 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1438 return -EINVAL;
1439 }
1440 size_se = size_se ? size_se : default_size_se;
1441
42ce2243 1442 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
b1023571
KW
1443 r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1444 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1445 &ngg_buf->bo,
1446 &ngg_buf->gpu_addr,
1447 NULL);
1448 if (r) {
1449 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1450 return r;
1451 }
1452 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1453
1454 return r;
1455}
1456
1457static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1458{
1459 int i;
1460
1461 for (i = 0; i < NGG_BUF_MAX; i++)
1462 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1463 &adev->gfx.ngg.buf[i].gpu_addr,
1464 NULL);
1465
1466 memset(&adev->gfx.ngg.buf[0], 0,
1467 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1468
1469 adev->gfx.ngg.init = false;
1470
1471 return 0;
1472}
1473
1474static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1475{
1476 int r;
1477
1478 if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1479 return 0;
1480
1481 /* GDS reserve memory: 64 bytes alignment */
1482 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
dca29491 1483 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
d33bba4d
JZ
1484 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1485 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
b1023571
KW
1486
1487 /* Primitive Buffer */
af8baf15 1488 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
b1023571
KW
1489 amdgpu_prim_buf_per_se,
1490 64 * 1024);
1491 if (r) {
1492 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1493 goto err;
1494 }
1495
1496 /* Position Buffer */
af8baf15 1497 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
b1023571
KW
1498 amdgpu_pos_buf_per_se,
1499 256 * 1024);
1500 if (r) {
1501 dev_err(adev->dev, "Failed to create Position Buffer\n");
1502 goto err;
1503 }
1504
1505 /* Control Sideband */
af8baf15 1506 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
b1023571
KW
1507 amdgpu_cntl_sb_buf_per_se,
1508 256);
1509 if (r) {
1510 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1511 goto err;
1512 }
1513
1514 /* Parameter Cache, not created by default */
1515 if (amdgpu_param_buf_per_se <= 0)
1516 goto out;
1517
af8baf15 1518 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
b1023571
KW
1519 amdgpu_param_buf_per_se,
1520 512 * 1024);
1521 if (r) {
1522 dev_err(adev->dev, "Failed to create Parameter Cache\n");
1523 goto err;
1524 }
1525
1526out:
1527 adev->gfx.ngg.init = true;
1528 return 0;
1529err:
1530 gfx_v9_0_ngg_fini(adev);
1531 return r;
1532}
1533
1534static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1535{
1536 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1537 int r;
91629eff 1538 u32 data, base;
b1023571
KW
1539
1540 if (!amdgpu_ngg)
1541 return 0;
1542
1543 /* Program buffer size */
91629eff
TSD
1544 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1545 adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1546 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1547 adev->gfx.ngg.buf[NGG_POS].size >> 8);
5e78835a 1548 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
b1023571 1549
91629eff
TSD
1550 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1551 adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1552 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1553 adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
5e78835a 1554 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
b1023571
KW
1555
1556 /* Program buffer base address */
af8baf15 1557 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
b1023571 1558 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
5e78835a 1559 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
b1023571 1560
af8baf15 1561 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
b1023571 1562 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
5e78835a 1563 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
b1023571 1564
af8baf15 1565 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
b1023571 1566 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
5e78835a 1567 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
b1023571 1568
af8baf15 1569 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
b1023571 1570 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
5e78835a 1571 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
b1023571 1572
af8baf15 1573 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
b1023571 1574 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
5e78835a 1575 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
b1023571 1576
af8baf15 1577 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
b1023571 1578 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
5e78835a 1579 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
b1023571
KW
1580
1581 /* Clear GDS reserved memory */
1582 r = amdgpu_ring_alloc(ring, 17);
1583 if (r) {
6e82c6e0
CK
1584 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1585 ring->name, r);
b1023571
KW
1586 return r;
1587 }
1588
1589 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 1590 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
dca29491 1591 (adev->gds.gds_size +
77a2faa5 1592 adev->gfx.ngg.gds_reserve_size));
b1023571
KW
1593
1594 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1595 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
d33bba4d 1596 PACKET3_DMA_DATA_DST_SEL(1) |
b1023571
KW
1597 PACKET3_DMA_DATA_SRC_SEL(2)));
1598 amdgpu_ring_write(ring, 0);
1599 amdgpu_ring_write(ring, 0);
1600 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1601 amdgpu_ring_write(ring, 0);
d33bba4d
JZ
1602 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1603 adev->gfx.ngg.gds_reserve_size);
b1023571
KW
1604
1605 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 1606 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
b1023571
KW
1607
1608 amdgpu_ring_commit(ring);
1609
1610 return 0;
1611}
1612
1361f455
AD
1613static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1614 int mec, int pipe, int queue)
1615{
1616 int r;
1617 unsigned irq_type;
1618 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1619
1620 ring = &adev->gfx.compute_ring[ring_id];
1621
1622 /* mec0 is me1 */
1623 ring->me = mec + 1;
1624 ring->pipe = pipe;
1625 ring->queue = queue;
1626
1627 ring->ring_obj = NULL;
1628 ring->use_doorbell = true;
9564f192 1629 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1361f455
AD
1630 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1631 + (ring_id * GFX9_MEC_HPD_SIZE);
1632 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1633
1634 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1635 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1636 + ring->pipe;
1637
1638 /* type-2 packets are deprecated on MEC, use type-3 instead */
1639 r = amdgpu_ring_init(adev, ring, 1024,
1640 &adev->gfx.eop_irq, irq_type);
1641 if (r)
1642 return r;
1643
1644
1645 return 0;
1646}
1647
b1023571
KW
1648static int gfx_v9_0_sw_init(void *handle)
1649{
1361f455 1650 int i, j, k, r, ring_id;
b1023571 1651 struct amdgpu_ring *ring;
ac104e99 1652 struct amdgpu_kiq *kiq;
b1023571
KW
1653 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1654
4853bbb6
AD
1655 switch (adev->asic_type) {
1656 case CHIP_VEGA10:
8b399477 1657 case CHIP_VEGA12:
61324ddc 1658 case CHIP_VEGA20:
4853bbb6
AD
1659 case CHIP_RAVEN:
1660 adev->gfx.mec.num_mec = 2;
1661 break;
1662 default:
1663 adev->gfx.mec.num_mec = 1;
1664 break;
1665 }
1666
1667 adev->gfx.mec.num_pipe_per_mec = 4;
1668 adev->gfx.mec.num_queue_per_pipe = 8;
1669
b1023571 1670 /* EOP Event */
44a99b65 1671 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
b1023571
KW
1672 if (r)
1673 return r;
1674
1675 /* Privileged reg */
44a99b65 1676 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
b1023571
KW
1677 &adev->gfx.priv_reg_irq);
1678 if (r)
1679 return r;
1680
1681 /* Privileged inst */
44a99b65 1682 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
b1023571
KW
1683 &adev->gfx.priv_inst_irq);
1684 if (r)
1685 return r;
1686
760a1d55
FX
1687 /* ECC error */
1688 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1689 &adev->gfx.cp_ecc_error_irq);
1690 if (r)
1691 return r;
1692
1693 /* FUE error */
1694 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1695 &adev->gfx.cp_ecc_error_irq);
1696 if (r)
1697 return r;
1698
b1023571
KW
1699 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1700
1701 gfx_v9_0_scratch_init(adev);
1702
1703 r = gfx_v9_0_init_microcode(adev);
1704 if (r) {
1705 DRM_ERROR("Failed to load gfx firmware!\n");
1706 return r;
1707 }
1708
fdb81fd7 1709 r = adev->gfx.rlc.funcs->init(adev);
c9719c69
HZ
1710 if (r) {
1711 DRM_ERROR("Failed to init rlc BOs!\n");
1712 return r;
1713 }
1714
b1023571
KW
1715 r = gfx_v9_0_mec_init(adev);
1716 if (r) {
1717 DRM_ERROR("Failed to init MEC BOs!\n");
1718 return r;
1719 }
1720
1721 /* set up the gfx ring */
1722 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1723 ring = &adev->gfx.gfx_ring[i];
1724 ring->ring_obj = NULL;
f6886c47
TSD
1725 if (!i)
1726 sprintf(ring->name, "gfx");
1727 else
1728 sprintf(ring->name, "gfx_%d", i);
b1023571 1729 ring->use_doorbell = true;
9564f192 1730 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
b1023571 1731 r = amdgpu_ring_init(adev, ring, 1024,
53b2fe41 1732 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
b1023571
KW
1733 if (r)
1734 return r;
1735 }
1736
1361f455
AD
1737 /* set up the compute queues - allocate horizontally across pipes */
1738 ring_id = 0;
1739 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1740 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1741 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2db0cdbe 1742 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1361f455
AD
1743 continue;
1744
1745 r = gfx_v9_0_compute_ring_init(adev,
1746 ring_id,
1747 i, k, j);
1748 if (r)
1749 return r;
1750
1751 ring_id++;
1752 }
b1023571 1753 }
b1023571
KW
1754 }
1755
71c37505 1756 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
e30a5223
AD
1757 if (r) {
1758 DRM_ERROR("Failed to init KIQ BOs!\n");
1759 return r;
1760 }
ac104e99 1761
e30a5223 1762 kiq = &adev->gfx.kiq;
71c37505 1763 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
e30a5223
AD
1764 if (r)
1765 return r;
464826d6 1766
e30a5223 1767 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
4fc6a88f 1768 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
e30a5223
AD
1769 if (r)
1770 return r;
ac104e99 1771
b1023571
KW
1772 adev->gfx.ce_ram_size = 0x8000;
1773
3251c043
AD
1774 r = gfx_v9_0_gpu_early_init(adev);
1775 if (r)
1776 return r;
b1023571
KW
1777
1778 r = gfx_v9_0_ngg_init(adev);
1779 if (r)
1780 return r;
1781
1782 return 0;
1783}
1784
1785
1786static int gfx_v9_0_sw_fini(void *handle)
1787{
1788 int i;
1789 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1790
760a1d55
FX
1791 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1792 adev->gfx.ras_if) {
1793 struct ras_common_if *ras_if = adev->gfx.ras_if;
1794 struct ras_ih_if ih_info = {
1795 .head = *ras_if,
1796 };
1797
1798 amdgpu_ras_debugfs_remove(adev, ras_if);
1799 amdgpu_ras_sysfs_remove(adev, ras_if);
1800 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
1801 amdgpu_ras_feature_enable(adev, ras_if, 0);
1802 kfree(ras_if);
1803 }
1804
b1023571
KW
1805 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1806 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1807 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1808 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1809
4fc6a88f 1810 amdgpu_gfx_mqd_sw_fini(adev);
71c37505
AD
1811 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1812 amdgpu_gfx_kiq_fini(adev);
ac104e99 1813
b1023571
KW
1814 gfx_v9_0_mec_fini(adev);
1815 gfx_v9_0_ngg_fini(adev);
789142eb 1816 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
741deade 1817 if (adev->asic_type == CHIP_RAVEN) {
9862def9
ML
1818 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1819 &adev->gfx.rlc.cp_table_gpu_addr,
1820 (void **)&adev->gfx.rlc.cp_table_ptr);
1821 }
c833d8aa 1822 gfx_v9_0_free_microcode(adev);
b1023571
KW
1823
1824 return 0;
1825}
1826
1827
1828static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1829{
1830 /* TODO */
1831}
1832
1833static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1834{
be448a4d 1835 u32 data;
b1023571 1836
be448a4d
NH
1837 if (instance == 0xffffffff)
1838 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1839 else
1840 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1841
1842 if (se_num == 0xffffffff)
b1023571 1843 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
be448a4d 1844 else
b1023571 1845 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
be448a4d
NH
1846
1847 if (sh_num == 0xffffffff)
1848 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1849 else
b1023571 1850 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
be448a4d 1851
1bff7f6c 1852 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
b1023571
KW
1853}
1854
b1023571
KW
1855static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1856{
1857 u32 data, mask;
1858
5e78835a
TSD
1859 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1860 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
b1023571
KW
1861
1862 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1863 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1864
378506a7
AD
1865 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1866 adev->gfx.config.max_sh_per_se);
b1023571
KW
1867
1868 return (~data) & mask;
1869}
1870
1871static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1872{
1873 int i, j;
2572c24c 1874 u32 data;
b1023571
KW
1875 u32 active_rbs = 0;
1876 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1877 adev->gfx.config.max_sh_per_se;
1878
1879 mutex_lock(&adev->grbm_idx_mutex);
1880 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1881 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1882 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1883 data = gfx_v9_0_get_rb_active_bitmap(adev);
1884 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1885 rb_bitmap_width_per_sh);
1886 }
1887 }
1888 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1889 mutex_unlock(&adev->grbm_idx_mutex);
1890
1891 adev->gfx.config.backend_enable_mask = active_rbs;
2572c24c 1892 adev->gfx.config.num_rbs = hweight32(active_rbs);
b1023571
KW
1893}
1894
1895#define DEFAULT_SH_MEM_BASES (0x6000)
1896#define FIRST_COMPUTE_VMID (8)
1897#define LAST_COMPUTE_VMID (16)
1898static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1899{
1900 int i;
1901 uint32_t sh_mem_config;
1902 uint32_t sh_mem_bases;
1903
1904 /*
1905 * Configure apertures:
1906 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
1907 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
1908 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
1909 */
1910 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1911
1912 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1913 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
eaa05d52 1914 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
b1023571
KW
1915
1916 mutex_lock(&adev->srbm_mutex);
1917 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1918 soc15_grbm_select(adev, 0, 0, 0, i);
1919 /* CP and shaders */
1bff7f6c
TH
1920 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1921 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
b1023571
KW
1922 }
1923 soc15_grbm_select(adev, 0, 0, 0, 0);
1924 mutex_unlock(&adev->srbm_mutex);
ad28e024
JG
1925
1926 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1927 acccess. These should be enabled by FW for target VMIDs. */
1928 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1929 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
1930 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
1931 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
1932 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
1933 }
b1023571
KW
1934}
1935
434e6df2 1936static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
b1023571
KW
1937{
1938 u32 tmp;
1939 int i;
1940
1bff7f6c 1941 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
b1023571
KW
1942
1943 gfx_v9_0_tiling_mode_table_init(adev);
1944
1945 gfx_v9_0_setup_rb(adev);
1946 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
5eeae247 1947 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
b1023571
KW
1948
1949 /* XXX SH_MEM regs */
1950 /* where to put LDS, scratch, GPUVM in FSA64 space */
1951 mutex_lock(&adev->srbm_mutex);
32b646b2 1952 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
b1023571
KW
1953 soc15_grbm_select(adev, 0, 0, 0, i);
1954 /* CP and shaders */
a7ea6548
AD
1955 if (i == 0) {
1956 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1957 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
75ee6487
FK
1958 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1959 !!amdgpu_noretry);
bdb50274
ED
1960 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1961 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
a7ea6548
AD
1962 } else {
1963 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1964 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
75ee6487
FK
1965 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
1966 !!amdgpu_noretry);
bdb50274 1967 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
bfa8eea2
FC
1968 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1969 (adev->gmc.private_aperture_start >> 48));
1970 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1971 (adev->gmc.shared_aperture_start >> 48));
bdb50274 1972 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
a7ea6548 1973 }
b1023571
KW
1974 }
1975 soc15_grbm_select(adev, 0, 0, 0, 0);
1976
1977 mutex_unlock(&adev->srbm_mutex);
1978
1979 gfx_v9_0_init_compute_vmid(adev);
b1023571
KW
1980}
1981
1982static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1983{
1984 u32 i, j, k;
1985 u32 mask;
1986
1987 mutex_lock(&adev->grbm_idx_mutex);
1988 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1989 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1990 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1991 for (k = 0; k < adev->usec_timeout; k++) {
5e78835a 1992 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
b1023571
KW
1993 break;
1994 udelay(1);
1995 }
1366b2d0 1996 if (k == adev->usec_timeout) {
1997 gfx_v9_0_select_se_sh(adev, 0xffffffff,
1998 0xffffffff, 0xffffffff);
1999 mutex_unlock(&adev->grbm_idx_mutex);
2000 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2001 i, j);
2002 return;
2003 }
b1023571
KW
2004 }
2005 }
2006 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2007 mutex_unlock(&adev->grbm_idx_mutex);
2008
2009 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2010 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2011 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2012 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2013 for (k = 0; k < adev->usec_timeout; k++) {
5e78835a 2014 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
b1023571
KW
2015 break;
2016 udelay(1);
2017 }
2018}
2019
2020static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2021 bool enable)
2022{
5e78835a 2023 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
b1023571 2024
b1023571
KW
2025 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2026 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2027 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2028 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2029
5e78835a 2030 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
b1023571
KW
2031}
2032
6bce4667
HZ
2033static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2034{
2035 /* csib */
1bff7f6c 2036 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
6bce4667 2037 adev->gfx.rlc.clear_state_gpu_addr >> 32);
1bff7f6c 2038 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
6bce4667 2039 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1bff7f6c 2040 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
6bce4667
HZ
2041 adev->gfx.rlc.clear_state_size);
2042}
2043
727b888f 2044static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
6bce4667
HZ
2045 int indirect_offset,
2046 int list_size,
2047 int *unique_indirect_regs,
cb5ed37f 2048 int unique_indirect_reg_count,
6bce4667 2049 int *indirect_start_offsets,
cb5ed37f
EQ
2050 int *indirect_start_offsets_count,
2051 int max_start_offsets_count)
6bce4667
HZ
2052{
2053 int idx;
6bce4667
HZ
2054
2055 for (; indirect_offset < list_size; indirect_offset++) {
cb5ed37f 2056 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
727b888f
HR
2057 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2058 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
6bce4667 2059
727b888f
HR
2060 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2061 indirect_offset += 2;
6bce4667 2062
727b888f 2063 /* look for the matching indice */
cb5ed37f 2064 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
727b888f
HR
2065 if (unique_indirect_regs[idx] ==
2066 register_list_format[indirect_offset] ||
2067 !unique_indirect_regs[idx])
2068 break;
2069 }
6bce4667 2070
cb5ed37f 2071 BUG_ON(idx >= unique_indirect_reg_count);
6bce4667 2072
727b888f
HR
2073 if (!unique_indirect_regs[idx])
2074 unique_indirect_regs[idx] = register_list_format[indirect_offset];
6bce4667 2075
727b888f 2076 indirect_offset++;
6bce4667 2077 }
6bce4667
HZ
2078 }
2079}
2080
727b888f 2081static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
6bce4667
HZ
2082{
2083 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2084 int unique_indirect_reg_count = 0;
2085
2086 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2087 int indirect_start_offsets_count = 0;
2088
2089 int list_size = 0;
727b888f 2090 int i = 0, j = 0;
6bce4667
HZ
2091 u32 tmp = 0;
2092
2093 u32 *register_list_format =
d12c2022
FH
2094 kmemdup(adev->gfx.rlc.register_list_format,
2095 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
6bce4667
HZ
2096 if (!register_list_format)
2097 return -ENOMEM;
6bce4667
HZ
2098
2099 /* setup unique_indirect_regs array and indirect_start_offsets array */
727b888f
HR
2100 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2101 gfx_v9_1_parse_ind_reg_list(register_list_format,
2102 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2103 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2104 unique_indirect_regs,
cb5ed37f 2105 unique_indirect_reg_count,
727b888f 2106 indirect_start_offsets,
cb5ed37f
EQ
2107 &indirect_start_offsets_count,
2108 ARRAY_SIZE(indirect_start_offsets));
6bce4667
HZ
2109
2110 /* enable auto inc in case it is disabled */
2111 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2112 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2113 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2114
2115 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2116 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2117 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2118 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2119 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2120 adev->gfx.rlc.register_restore[i]);
2121
6bce4667
HZ
2122 /* load indirect register */
2123 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2124 adev->gfx.rlc.reg_list_format_start);
727b888f
HR
2125
2126 /* direct register portion */
2127 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
6bce4667
HZ
2128 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2129 register_list_format[i]);
2130
727b888f
HR
2131 /* indirect register portion */
2132 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2133 if (register_list_format[i] == 0xFFFFFFFF) {
2134 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2135 continue;
2136 }
2137
2138 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2139 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2140
2141 for (j = 0; j < unique_indirect_reg_count; j++) {
2142 if (register_list_format[i] == unique_indirect_regs[j]) {
2143 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2144 break;
2145 }
2146 }
2147
2148 BUG_ON(j >= unique_indirect_reg_count);
2149
2150 i++;
2151 }
2152
6bce4667
HZ
2153 /* set save/restore list size */
2154 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2155 list_size = list_size >> 1;
2156 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2157 adev->gfx.rlc.reg_restore_list_size);
2158 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2159
2160 /* write the starting offsets to RLC scratch ram */
2161 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2162 adev->gfx.rlc.starting_offsets_start);
c1b24a14 2163 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
6bce4667 2164 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
727b888f 2165 indirect_start_offsets[i]);
6bce4667
HZ
2166
2167 /* load unique indirect regs*/
c1b24a14 2168 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
727b888f
HR
2169 if (unique_indirect_regs[i] != 0) {
2170 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2171 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2172 unique_indirect_regs[i] & 0x3FFFF);
2173
2174 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2175 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2176 unique_indirect_regs[i] >> 20);
2177 }
6bce4667
HZ
2178 }
2179
2180 kfree(register_list_format);
2181 return 0;
2182}
2183
2184static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2185{
0e5293d0 2186 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
6bce4667
HZ
2187}
2188
91d3130a
HZ
2189static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2190 bool enable)
2191{
2192 uint32_t data = 0;
2193 uint32_t default_data = 0;
2194
2195 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2196 if (enable == true) {
2197 /* enable GFXIP control over CGPG */
2198 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2199 if(default_data != data)
2200 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2201
2202 /* update status */
2203 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2204 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2205 if(default_data != data)
2206 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2207 } else {
2208 /* restore GFXIP control over GCPG */
2209 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2210 if(default_data != data)
2211 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2212 }
2213}
2214
2215static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2216{
2217 uint32_t data = 0;
2218
2219 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2220 AMD_PG_SUPPORT_GFX_SMG |
2221 AMD_PG_SUPPORT_GFX_DMG)) {
2222 /* init IDLE_POLL_COUNT = 60 */
2223 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2224 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2225 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2226 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2227
2228 /* init RLC PG Delay */
2229 data = 0;
2230 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2231 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2232 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2233 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2234 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2235
2236 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2237 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2238 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2239 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2240
2241 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2242 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2243 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2244 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2245
2246 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2247 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2248
2249 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2250 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2251 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2252
2253 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2254 }
2255}
2256
ed5ad1e4
HZ
2257static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2258 bool enable)
2259{
2260 uint32_t data = 0;
2261 uint32_t default_data = 0;
2262
2263 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
e24c7f06
TSD
2264 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2265 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2266 enable ? 1 : 0);
2267 if (default_data != data)
2268 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
ed5ad1e4
HZ
2269}
2270
2271static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2272 bool enable)
2273{
2274 uint32_t data = 0;
2275 uint32_t default_data = 0;
2276
2277 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
b926fe8e
TSD
2278 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2279 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2280 enable ? 1 : 0);
2281 if(default_data != data)
2282 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
ed5ad1e4
HZ
2283}
2284
3a6cc477
HZ
2285static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2286 bool enable)
2287{
2288 uint32_t data = 0;
2289 uint32_t default_data = 0;
2290
2291 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
54cfe0fc
TSD
2292 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2293 CP_PG_DISABLE,
2294 enable ? 0 : 1);
2295 if(default_data != data)
2296 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3a6cc477
HZ
2297}
2298
197f95c8
HZ
2299static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2300 bool enable)
2301{
2302 uint32_t data, default_data;
2303
2304 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
f55ee212
TSD
2305 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2306 GFX_POWER_GATING_ENABLE,
2307 enable ? 1 : 0);
197f95c8
HZ
2308 if(default_data != data)
2309 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2310}
2311
2312static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2313 bool enable)
2314{
2315 uint32_t data, default_data;
2316
2317 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
513f8133
TSD
2318 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2319 GFX_PIPELINE_PG_ENABLE,
2320 enable ? 1 : 0);
197f95c8
HZ
2321 if(default_data != data)
2322 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2323
2324 if (!enable)
2325 /* read any GFX register to wake up GFX */
2326 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2327}
2328
552c8f76 2329static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2330 bool enable)
18924c71
HZ
2331{
2332 uint32_t data, default_data;
2333
2334 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
7915c8fd
TSD
2335 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2336 STATIC_PER_CU_PG_ENABLE,
2337 enable ? 1 : 0);
18924c71
HZ
2338 if(default_data != data)
2339 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2340}
2341
552c8f76 2342static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
18924c71
HZ
2343 bool enable)
2344{
2345 uint32_t data, default_data;
2346
2347 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
e567fa69
TSD
2348 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2349 DYN_PER_CU_PG_ENABLE,
2350 enable ? 1 : 0);
18924c71
HZ
2351 if(default_data != data)
2352 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2353}
2354
6bce4667
HZ
2355static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2356{
af356b6d
EQ
2357 gfx_v9_0_init_csb(adev);
2358
b58b65cf
EQ
2359 /*
2360 * Rlc save restore list is workable since v2_1.
2361 * And it's needed by gfxoff feature.
2362 */
2363 if (adev->gfx.rlc.is_rlc_v2_1) {
2364 gfx_v9_1_init_rlc_save_restore_list(adev);
2365 gfx_v9_0_enable_save_restore_machine(adev);
2366 }
a5acf930 2367
6bce4667
HZ
2368 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2369 AMD_PG_SUPPORT_GFX_SMG |
2370 AMD_PG_SUPPORT_GFX_DMG |
2371 AMD_PG_SUPPORT_CP |
2372 AMD_PG_SUPPORT_GDS |
2373 AMD_PG_SUPPORT_RLC_SMU_HS)) {
a5acf930
HR
2374 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2375 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2376 gfx_v9_0_init_gfx_power_gating(adev);
6bce4667
HZ
2377 }
2378}
2379
b1023571
KW
2380void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2381{
b08796ce 2382 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
b1023571 2383 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
b1023571
KW
2384 gfx_v9_0_wait_for_rlc_serdes(adev);
2385}
2386
2387static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2388{
596c8e8b 2389 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
b1023571 2390 udelay(50);
596c8e8b 2391 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
b1023571
KW
2392 udelay(50);
2393}
2394
2395static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2396{
2397#ifdef AMDGPU_RLC_DEBUG_RETRY
2398 u32 rlc_ucode_ver;
2399#endif
b1023571 2400
342cda25 2401 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
ad97d9de 2402 udelay(50);
b1023571
KW
2403
2404 /* carrizo do enable cp interrupt after cp inited */
ad97d9de 2405 if (!(adev->flags & AMD_IS_APU)) {
b1023571 2406 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
ad97d9de 2407 udelay(50);
2408 }
b1023571
KW
2409
2410#ifdef AMDGPU_RLC_DEBUG_RETRY
2411 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
5e78835a 2412 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
b1023571
KW
2413 if(rlc_ucode_ver == 0x108) {
2414 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2415 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2416 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2417 * default is 0x9C4 to create a 100us interval */
5e78835a 2418 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
b1023571 2419 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
eaa05d52 2420 * to disable the page fault retry interrupts, default is
b1023571 2421 * 0x100 (256) */
5e78835a 2422 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
b1023571
KW
2423 }
2424#endif
2425}
2426
2427static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2428{
2429 const struct rlc_firmware_header_v2_0 *hdr;
2430 const __le32 *fw_data;
2431 unsigned i, fw_size;
2432
2433 if (!adev->gfx.rlc_fw)
2434 return -EINVAL;
2435
2436 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2437 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2438
2439 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2440 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2441 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2442
5e78835a 2443 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
b1023571
KW
2444 RLCG_UCODE_LOADING_START_ADDRESS);
2445 for (i = 0; i < fw_size; i++)
5e78835a
TSD
2446 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2447 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
b1023571
KW
2448
2449 return 0;
2450}
2451
2452static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2453{
2454 int r;
2455
f840cc5f
ML
2456 if (amdgpu_sriov_vf(adev)) {
2457 gfx_v9_0_init_csb(adev);
cfee05bc 2458 return 0;
f840cc5f 2459 }
cfee05bc 2460
fdb81fd7 2461 adev->gfx.rlc.funcs->stop(adev);
b1023571
KW
2462
2463 /* disable CG */
5e78835a 2464 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
b1023571 2465
6bce4667
HZ
2466 gfx_v9_0_init_pg(adev);
2467
b1023571
KW
2468 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2469 /* legacy rlc firmware loading */
2470 r = gfx_v9_0_rlc_load_microcode(adev);
2471 if (r)
2472 return r;
2473 }
2474
688be01a
AD
2475 switch (adev->asic_type) {
2476 case CHIP_RAVEN:
2477 if (amdgpu_lbpw == 0)
2478 gfx_v9_0_enable_lbpw(adev, false);
2479 else
2480 gfx_v9_0_enable_lbpw(adev, true);
2481 break;
2482 case CHIP_VEGA20:
2483 if (amdgpu_lbpw > 0)
e8835e0e
HZ
2484 gfx_v9_0_enable_lbpw(adev, true);
2485 else
2486 gfx_v9_0_enable_lbpw(adev, false);
688be01a
AD
2487 break;
2488 default:
2489 break;
e8835e0e
HZ
2490 }
2491
fdb81fd7 2492 adev->gfx.rlc.funcs->start(adev);
b1023571
KW
2493
2494 return 0;
2495}
2496
2497static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2498{
2499 int i;
5e78835a 2500 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
b1023571 2501
ea64468e
TSD
2502 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2503 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2504 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2505 if (!enable) {
b1023571 2506 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
c66ed765 2507 adev->gfx.gfx_ring[i].sched.ready = false;
b1023571 2508 }
1bff7f6c 2509 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
b1023571
KW
2510 udelay(50);
2511}
2512
2513static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2514{
2515 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2516 const struct gfx_firmware_header_v1_0 *ce_hdr;
2517 const struct gfx_firmware_header_v1_0 *me_hdr;
2518 const __le32 *fw_data;
2519 unsigned i, fw_size;
2520
2521 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2522 return -EINVAL;
2523
2524 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2525 adev->gfx.pfp_fw->data;
2526 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2527 adev->gfx.ce_fw->data;
2528 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2529 adev->gfx.me_fw->data;
2530
2531 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2532 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2533 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2534
2535 gfx_v9_0_cp_gfx_enable(adev, false);
2536
2537 /* PFP */
2538 fw_data = (const __le32 *)
2539 (adev->gfx.pfp_fw->data +
2540 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2541 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
5e78835a 2542 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
b1023571 2543 for (i = 0; i < fw_size; i++)
5e78835a
TSD
2544 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2545 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
b1023571
KW
2546
2547 /* CE */
2548 fw_data = (const __le32 *)
2549 (adev->gfx.ce_fw->data +
2550 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2551 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
5e78835a 2552 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
b1023571 2553 for (i = 0; i < fw_size; i++)
5e78835a
TSD
2554 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2555 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
b1023571
KW
2556
2557 /* ME */
2558 fw_data = (const __le32 *)
2559 (adev->gfx.me_fw->data +
2560 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2561 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
5e78835a 2562 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
b1023571 2563 for (i = 0; i < fw_size; i++)
5e78835a
TSD
2564 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2565 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
b1023571
KW
2566
2567 return 0;
2568}
2569
b1023571
KW
2570static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2571{
2572 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2573 const struct cs_section_def *sect = NULL;
2574 const struct cs_extent_def *ext = NULL;
d5de797f 2575 int r, i, tmp;
b1023571
KW
2576
2577 /* init the CP */
5e78835a
TSD
2578 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2579 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
b1023571
KW
2580
2581 gfx_v9_0_cp_gfx_enable(adev, true);
2582
d5de797f 2583 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
b1023571
KW
2584 if (r) {
2585 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2586 return r;
2587 }
2588
2589 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2590 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2591
2592 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2593 amdgpu_ring_write(ring, 0x80000000);
2594 amdgpu_ring_write(ring, 0x80000000);
2595
2596 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2597 for (ext = sect->section; ext->extent != NULL; ++ext) {
2598 if (sect->id == SECT_CONTEXT) {
2599 amdgpu_ring_write(ring,
2600 PACKET3(PACKET3_SET_CONTEXT_REG,
2601 ext->reg_count));
2602 amdgpu_ring_write(ring,
2603 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2604 for (i = 0; i < ext->reg_count; i++)
2605 amdgpu_ring_write(ring, ext->extent[i]);
2606 }
2607 }
2608 }
2609
2610 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2611 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2612
2613 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2614 amdgpu_ring_write(ring, 0);
2615
2616 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2617 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2618 amdgpu_ring_write(ring, 0x8000);
2619 amdgpu_ring_write(ring, 0x8000);
2620
d5de797f
KW
2621 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2622 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2623 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2624 amdgpu_ring_write(ring, tmp);
2625 amdgpu_ring_write(ring, 0);
2626
b1023571
KW
2627 amdgpu_ring_commit(ring);
2628
2629 return 0;
2630}
2631
2632static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2633{
2634 struct amdgpu_ring *ring;
2635 u32 tmp;
2636 u32 rb_bufsz;
3fc08b61 2637 u64 rb_addr, rptr_addr, wptr_gpu_addr;
b1023571
KW
2638
2639 /* Set the write pointer delay */
5e78835a 2640 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
b1023571
KW
2641
2642 /* set the RB to use vmid 0 */
5e78835a 2643 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
b1023571
KW
2644
2645 /* Set ring buffer size */
2646 ring = &adev->gfx.gfx_ring[0];
2647 rb_bufsz = order_base_2(ring->ring_size / 8);
2648 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2649 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2650#ifdef __BIG_ENDIAN
2651 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2652#endif
5e78835a 2653 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
b1023571
KW
2654
2655 /* Initialize the ring buffer's write pointers */
2656 ring->wptr = 0;
5e78835a
TSD
2657 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2658 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
b1023571
KW
2659
2660 /* set the wb address wether it's enabled or not */
2661 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5e78835a
TSD
2662 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2663 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
b1023571 2664
3fc08b61 2665 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5e78835a
TSD
2666 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2667 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3fc08b61 2668
b1023571 2669 mdelay(1);
5e78835a 2670 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
b1023571
KW
2671
2672 rb_addr = ring->gpu_addr >> 8;
5e78835a
TSD
2673 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2674 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
b1023571 2675
5e78835a 2676 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
b1023571
KW
2677 if (ring->use_doorbell) {
2678 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2679 DOORBELL_OFFSET, ring->doorbell_index);
2680 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2681 DOORBELL_EN, 1);
2682 } else {
2683 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2684 }
5e78835a 2685 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
b1023571
KW
2686
2687 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2688 DOORBELL_RANGE_LOWER, ring->doorbell_index);
5e78835a 2689 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
b1023571 2690
5e78835a 2691 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
b1023571
KW
2692 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2693
2694
2695 /* start the ring */
2696 gfx_v9_0_cp_gfx_start(adev);
c66ed765 2697 ring->sched.ready = true;
b1023571
KW
2698
2699 return 0;
2700}
2701
2702static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2703{
2704 int i;
2705
2706 if (enable) {
1bff7f6c 2707 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
b1023571 2708 } else {
1bff7f6c 2709 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
b1023571
KW
2710 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2711 for (i = 0; i < adev->gfx.num_compute_rings; i++)
c66ed765
AG
2712 adev->gfx.compute_ring[i].sched.ready = false;
2713 adev->gfx.kiq.ring.sched.ready = false;
b1023571
KW
2714 }
2715 udelay(50);
2716}
2717
b1023571
KW
2718static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2719{
2720 const struct gfx_firmware_header_v1_0 *mec_hdr;
2721 const __le32 *fw_data;
2722 unsigned i;
2723 u32 tmp;
2724
2725 if (!adev->gfx.mec_fw)
2726 return -EINVAL;
2727
2728 gfx_v9_0_cp_compute_enable(adev, false);
2729
2730 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2731 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2732
2733 fw_data = (const __le32 *)
2734 (adev->gfx.mec_fw->data +
2735 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2736 tmp = 0;
2737 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2738 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
5e78835a 2739 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
b1023571 2740
5e78835a 2741 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
b1023571 2742 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
5e78835a 2743 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
b1023571 2744 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
eaa05d52 2745
b1023571 2746 /* MEC1 */
5e78835a 2747 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
b1023571
KW
2748 mec_hdr->jt_offset);
2749 for (i = 0; i < mec_hdr->jt_size; i++)
5e78835a 2750 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
b1023571
KW
2751 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2752
5e78835a 2753 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
b1023571
KW
2754 adev->gfx.mec_fw_version);
2755 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2756
2757 return 0;
2758}
2759
464826d6
XY
2760/* KIQ functions */
2761static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
b1023571 2762{
464826d6
XY
2763 uint32_t tmp;
2764 struct amdgpu_device *adev = ring->adev;
b1023571 2765
464826d6 2766 /* tell RLC which is KIQ queue */
5e78835a 2767 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
464826d6
XY
2768 tmp &= 0xffffff00;
2769 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1bff7f6c 2770 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
464826d6 2771 tmp |= 0x80;
1bff7f6c 2772 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
464826d6 2773}
b1023571 2774
0f1dfd52 2775static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
464826d6 2776{
bd3402ea 2777 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
de65513a 2778 uint64_t queue_mask = 0;
2fdde9fa 2779 int r, i;
b1023571 2780
de65513a
AR
2781 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2782 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2783 continue;
b1023571 2784
de65513a
AR
2785 /* This situation may be hit in the future if a new HW
2786 * generation exposes more than 64 queues. If so, the
2787 * definition of queue_mask needs updating */
1d11ee89 2788 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
de65513a
AR
2789 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2790 break;
b1023571 2791 }
b1023571 2792
de65513a
AR
2793 queue_mask |= (1ull << i);
2794 }
b1023571 2795
841cf911 2796 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2fdde9fa
AD
2797 if (r) {
2798 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
b1023571 2799 return r;
2fdde9fa 2800 }
b1023571 2801
0f1dfd52
AD
2802 /* set resources */
2803 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2804 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2805 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
de65513a
AR
2806 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
2807 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
0f1dfd52
AD
2808 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
2809 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
2810 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
2811 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
bd3402ea
AD
2812 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2813 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2814 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2815 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2816
2817 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2818 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2819 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2820 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2821 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2822 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2823 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2824 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2825 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
f4534f06 2826 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
bd3402ea
AD
2827 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2828 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2829 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2830 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2831 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2832 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2833 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2834 }
b1023571 2835
c66ed765
AG
2836 r = amdgpu_ring_test_helper(kiq_ring);
2837 if (r)
841cf911 2838 DRM_ERROR("KCQ enable failed\n");
464826d6 2839
2fdde9fa 2840 return r;
464826d6
XY
2841}
2842
e322edc3 2843static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
464826d6 2844{
33fb8698 2845 struct amdgpu_device *adev = ring->adev;
e322edc3 2846 struct v9_mqd *mqd = ring->mqd_ptr;
464826d6
XY
2847 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2848 uint32_t tmp;
2849
2850 mqd->header = 0xC0310800;
2851 mqd->compute_pipelinestat_enable = 0x00000001;
2852 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2853 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2854 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2855 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2856 mqd->compute_misc_reserved = 0x00000003;
2857
ffe6d881
AD
2858 mqd->dynamic_cu_mask_addr_lo =
2859 lower_32_bits(ring->mqd_gpu_addr
2860 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2861 mqd->dynamic_cu_mask_addr_hi =
2862 upper_32_bits(ring->mqd_gpu_addr
2863 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2864
d72f2f46 2865 eop_base_addr = ring->eop_gpu_addr >> 8;
464826d6
XY
2866 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2867 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2868
2869 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5e78835a 2870 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
464826d6 2871 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
268cb4c7 2872 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
464826d6
XY
2873
2874 mqd->cp_hqd_eop_control = tmp;
2875
2876 /* enable doorbell? */
5e78835a 2877 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
464826d6
XY
2878
2879 if (ring->use_doorbell) {
2880 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2881 DOORBELL_OFFSET, ring->doorbell_index);
2882 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883 DOORBELL_EN, 1);
2884 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2885 DOORBELL_SOURCE, 0);
2886 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2887 DOORBELL_HIT, 0);
78888cff 2888 } else {
464826d6
XY
2889 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2890 DOORBELL_EN, 0);
78888cff 2891 }
464826d6
XY
2892
2893 mqd->cp_hqd_pq_doorbell_control = tmp;
2894
2895 /* disable the queue if it's active */
2896 ring->wptr = 0;
2897 mqd->cp_hqd_dequeue_request = 0;
2898 mqd->cp_hqd_pq_rptr = 0;
2899 mqd->cp_hqd_pq_wptr_lo = 0;
2900 mqd->cp_hqd_pq_wptr_hi = 0;
2901
2902 /* set the pointer to the MQD */
33fb8698
AD
2903 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2904 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
464826d6
XY
2905
2906 /* set MQD vmid to 0 */
5e78835a 2907 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
464826d6
XY
2908 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2909 mqd->cp_mqd_control = tmp;
2910
2911 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2912 hqd_gpu_addr = ring->gpu_addr >> 8;
2913 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2914 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2915
2916 /* set up the HQD, this is similar to CP_RB0_CNTL */
5e78835a 2917 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
464826d6
XY
2918 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2919 (order_base_2(ring->ring_size / 4) - 1));
2920 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2921 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2922#ifdef __BIG_ENDIAN
2923 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2924#endif
2925 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2926 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2927 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2928 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2929 mqd->cp_hqd_pq_control = tmp;
2930
2931 /* set the wb address whether it's enabled or not */
2932 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2933 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2934 mqd->cp_hqd_pq_rptr_report_addr_hi =
2935 upper_32_bits(wb_gpu_addr) & 0xffff;
2936
2937 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2938 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2939 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2940 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2941
2942 tmp = 0;
2943 /* enable the doorbell if requested */
2944 if (ring->use_doorbell) {
5e78835a 2945 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
464826d6
XY
2946 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2947 DOORBELL_OFFSET, ring->doorbell_index);
2948
2949 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2950 DOORBELL_EN, 1);
2951 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2952 DOORBELL_SOURCE, 0);
2953 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2954 DOORBELL_HIT, 0);
2955 }
2956
2957 mqd->cp_hqd_pq_doorbell_control = tmp;
2958
2959 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2960 ring->wptr = 0;
0274a9c5 2961 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
464826d6
XY
2962
2963 /* set the vmid for the queue */
2964 mqd->cp_hqd_vmid = 0;
2965
0274a9c5 2966 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
464826d6
XY
2967 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2968 mqd->cp_hqd_persistent_state = tmp;
2969
fca4ce69
AD
2970 /* set MIN_IB_AVAIL_SIZE */
2971 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2972 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2973 mqd->cp_hqd_ib_control = tmp;
2974
464826d6
XY
2975 /* activate the queue */
2976 mqd->cp_hqd_active = 1;
2977
2978 return 0;
2979}
2980
e322edc3 2981static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
464826d6 2982{
33fb8698 2983 struct amdgpu_device *adev = ring->adev;
e322edc3 2984 struct v9_mqd *mqd = ring->mqd_ptr;
464826d6
XY
2985 int j;
2986
2987 /* disable wptr polling */
72edadd5 2988 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
464826d6 2989
1bff7f6c 2990 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
464826d6 2991 mqd->cp_hqd_eop_base_addr_lo);
1bff7f6c 2992 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
464826d6
XY
2993 mqd->cp_hqd_eop_base_addr_hi);
2994
2995 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1bff7f6c 2996 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
464826d6
XY
2997 mqd->cp_hqd_eop_control);
2998
2999 /* enable doorbell? */
1bff7f6c 3000 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
464826d6
XY
3001 mqd->cp_hqd_pq_doorbell_control);
3002
3003 /* disable the queue if it's active */
5e78835a 3004 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
1bff7f6c 3005 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
464826d6 3006 for (j = 0; j < adev->usec_timeout; j++) {
5e78835a 3007 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
464826d6
XY
3008 break;
3009 udelay(1);
3010 }
1bff7f6c 3011 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
464826d6 3012 mqd->cp_hqd_dequeue_request);
1bff7f6c 3013 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
464826d6 3014 mqd->cp_hqd_pq_rptr);
1bff7f6c 3015 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
464826d6 3016 mqd->cp_hqd_pq_wptr_lo);
1bff7f6c 3017 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
464826d6
XY
3018 mqd->cp_hqd_pq_wptr_hi);
3019 }
3020
3021 /* set the pointer to the MQD */
1bff7f6c 3022 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
464826d6 3023 mqd->cp_mqd_base_addr_lo);
1bff7f6c 3024 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
464826d6
XY
3025 mqd->cp_mqd_base_addr_hi);
3026
3027 /* set MQD vmid to 0 */
1bff7f6c 3028 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
464826d6
XY
3029 mqd->cp_mqd_control);
3030
3031 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1bff7f6c 3032 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
464826d6 3033 mqd->cp_hqd_pq_base_lo);
1bff7f6c 3034 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
464826d6
XY
3035 mqd->cp_hqd_pq_base_hi);
3036
3037 /* set up the HQD, this is similar to CP_RB0_CNTL */
1bff7f6c 3038 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
464826d6
XY
3039 mqd->cp_hqd_pq_control);
3040
3041 /* set the wb address whether it's enabled or not */
1bff7f6c 3042 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
464826d6 3043 mqd->cp_hqd_pq_rptr_report_addr_lo);
1bff7f6c 3044 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
464826d6
XY
3045 mqd->cp_hqd_pq_rptr_report_addr_hi);
3046
3047 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1bff7f6c 3048 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
464826d6 3049 mqd->cp_hqd_pq_wptr_poll_addr_lo);
1bff7f6c 3050 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
464826d6
XY
3051 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3052
3053 /* enable the doorbell if requested */
3054 if (ring->use_doorbell) {
5e78835a 3055 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
9564f192 3056 (adev->doorbell_index.kiq * 2) << 2);
5e78835a 3057 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
9564f192 3058 (adev->doorbell_index.userqueue_end * 2) << 2);
464826d6
XY
3059 }
3060
bdb50274 3061 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
464826d6
XY
3062 mqd->cp_hqd_pq_doorbell_control);
3063
3064 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
1bff7f6c 3065 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
464826d6 3066 mqd->cp_hqd_pq_wptr_lo);
1bff7f6c 3067 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
464826d6
XY
3068 mqd->cp_hqd_pq_wptr_hi);
3069
3070 /* set the vmid for the queue */
1bff7f6c 3071 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
464826d6 3072
1bff7f6c 3073 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
464826d6
XY
3074 mqd->cp_hqd_persistent_state);
3075
3076 /* activate the queue */
1bff7f6c 3077 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
464826d6
XY
3078 mqd->cp_hqd_active);
3079
72edadd5
TSD
3080 if (ring->use_doorbell)
3081 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
464826d6
XY
3082
3083 return 0;
3084}
3085
326aa996
AG
3086static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3087{
3088 struct amdgpu_device *adev = ring->adev;
3089 int j;
3090
3091 /* disable the queue if it's active */
3092 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3093
1bff7f6c 3094 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
326aa996
AG
3095
3096 for (j = 0; j < adev->usec_timeout; j++) {
3097 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3098 break;
3099 udelay(1);
3100 }
3101
f7a9ee81 3102 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
326aa996
AG
3103 DRM_DEBUG("KIQ dequeue request failed.\n");
3104
f7a9ee81 3105 /* Manual disable if dequeue request times out */
1bff7f6c 3106 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
326aa996
AG
3107 }
3108
1bff7f6c 3109 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
326aa996
AG
3110 0);
3111 }
3112
1bff7f6c
TH
3113 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3114 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3115 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3116 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3117 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3118 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3119 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3120 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
326aa996
AG
3121
3122 return 0;
3123}
3124
e322edc3 3125static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
464826d6
XY
3126{
3127 struct amdgpu_device *adev = ring->adev;
e322edc3 3128 struct v9_mqd *mqd = ring->mqd_ptr;
464826d6
XY
3129 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3130
898b7893 3131 gfx_v9_0_kiq_setting(ring);
464826d6 3132
13a752e3 3133 if (adev->in_gpu_reset) { /* for GPU_RESET case */
464826d6 3134 /* reset MQD to a clean status */
0ef376ca 3135 if (adev->gfx.mec.mqd_backup[mqd_idx])
ffe6d881 3136 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
464826d6
XY
3137
3138 /* reset ring buffer */
3139 ring->wptr = 0;
b98724db 3140 amdgpu_ring_clear_ring(ring);
464826d6 3141
898b7893
AD
3142 mutex_lock(&adev->srbm_mutex);
3143 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3144 gfx_v9_0_kiq_init_register(ring);
3145 soc15_grbm_select(adev, 0, 0, 0, 0);
3146 mutex_unlock(&adev->srbm_mutex);
464826d6 3147 } else {
ffe6d881
AD
3148 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3149 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3150 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
ba0c19f5
AD
3151 mutex_lock(&adev->srbm_mutex);
3152 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3153 gfx_v9_0_mqd_init(ring);
3154 gfx_v9_0_kiq_init_register(ring);
3155 soc15_grbm_select(adev, 0, 0, 0, 0);
3156 mutex_unlock(&adev->srbm_mutex);
3157
3158 if (adev->gfx.mec.mqd_backup[mqd_idx])
ffe6d881 3159 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
464826d6
XY
3160 }
3161
0f1dfd52 3162 return 0;
898b7893
AD
3163}
3164
3165static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3166{
3167 struct amdgpu_device *adev = ring->adev;
898b7893
AD
3168 struct v9_mqd *mqd = ring->mqd_ptr;
3169 int mqd_idx = ring - &adev->gfx.compute_ring[0];
898b7893 3170
44779b43 3171 if (!adev->in_gpu_reset && !adev->in_suspend) {
ffe6d881
AD
3172 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3173 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3174 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
464826d6
XY
3175 mutex_lock(&adev->srbm_mutex);
3176 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
e322edc3 3177 gfx_v9_0_mqd_init(ring);
464826d6
XY
3178 soc15_grbm_select(adev, 0, 0, 0, 0);
3179 mutex_unlock(&adev->srbm_mutex);
3180
898b7893 3181 if (adev->gfx.mec.mqd_backup[mqd_idx])
ffe6d881 3182 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
13a752e3 3183 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
464826d6 3184 /* reset MQD to a clean status */
898b7893 3185 if (adev->gfx.mec.mqd_backup[mqd_idx])
ffe6d881 3186 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
464826d6
XY
3187
3188 /* reset ring buffer */
3189 ring->wptr = 0;
898b7893 3190 amdgpu_ring_clear_ring(ring);
ba0c19f5
AD
3191 } else {
3192 amdgpu_ring_clear_ring(ring);
464826d6
XY
3193 }
3194
464826d6
XY
3195 return 0;
3196}
3197
3198static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3199{
a9a8a788
RZ
3200 struct amdgpu_ring *ring;
3201 int r;
464826d6
XY
3202
3203 ring = &adev->gfx.kiq.ring;
e1d53aa8
AD
3204
3205 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3206 if (unlikely(r != 0))
a9a8a788 3207 return r;
e1d53aa8
AD
3208
3209 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
a9a8a788
RZ
3210 if (unlikely(r != 0))
3211 return r;
3212
3213 gfx_v9_0_kiq_init_queue(ring);
3214 amdgpu_bo_kunmap(ring->mqd_obj);
3215 ring->mqd_ptr = NULL;
e1d53aa8 3216 amdgpu_bo_unreserve(ring->mqd_obj);
c66ed765 3217 ring->sched.ready = true;
a9a8a788
RZ
3218 return 0;
3219}
3220
3221static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3222{
3223 struct amdgpu_ring *ring = NULL;
3224 int r = 0, i;
3225
3226 gfx_v9_0_cp_compute_enable(adev, true);
464826d6
XY
3227
3228 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3229 ring = &adev->gfx.compute_ring[i];
e1d53aa8
AD
3230
3231 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3232 if (unlikely(r != 0))
3233 goto done;
3234 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3235 if (!r) {
898b7893 3236 r = gfx_v9_0_kcq_init_queue(ring);
464826d6
XY
3237 amdgpu_bo_kunmap(ring->mqd_obj);
3238 ring->mqd_ptr = NULL;
464826d6 3239 }
e1d53aa8
AD
3240 amdgpu_bo_unreserve(ring->mqd_obj);
3241 if (r)
3242 goto done;
464826d6
XY
3243 }
3244
0f1dfd52 3245 r = gfx_v9_0_kiq_kcq_enable(adev);
e1d53aa8
AD
3246done:
3247 return r;
464826d6
XY
3248}
3249
b1023571
KW
3250static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3251{
bd3402ea 3252 int r, i;
b1023571
KW
3253 struct amdgpu_ring *ring;
3254
3255 if (!(adev->flags & AMD_IS_APU))
3256 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3257
3258 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3259 /* legacy firmware loading */
3260 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3261 if (r)
3262 return r;
3263
3264 r = gfx_v9_0_cp_compute_load_microcode(adev);
3265 if (r)
3266 return r;
3267 }
3268
a9a8a788
RZ
3269 r = gfx_v9_0_kiq_resume(adev);
3270 if (r)
3271 return r;
3272
b1023571
KW
3273 r = gfx_v9_0_cp_gfx_resume(adev);
3274 if (r)
3275 return r;
3276
a9a8a788 3277 r = gfx_v9_0_kcq_resume(adev);
b1023571
KW
3278 if (r)
3279 return r;
3280
3281 ring = &adev->gfx.gfx_ring[0];
c66ed765
AG
3282 r = amdgpu_ring_test_helper(ring);
3283 if (r)
b1023571 3284 return r;
e30a5223 3285
b1023571
KW
3286 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3287 ring = &adev->gfx.compute_ring[i];
c66ed765 3288 amdgpu_ring_test_helper(ring);
b1023571
KW
3289 }
3290
3291 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3292
3293 return 0;
3294}
3295
3296static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3297{
3298 gfx_v9_0_cp_gfx_enable(adev, enable);
3299 gfx_v9_0_cp_compute_enable(adev, enable);
3300}
3301
3302static int gfx_v9_0_hw_init(void *handle)
3303{
3304 int r;
3305 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3306
3307 gfx_v9_0_init_golden_registers(adev);
3308
434e6df2 3309 gfx_v9_0_constants_init(adev);
b1023571 3310
137dc4b9
EQ
3311 r = gfx_v9_0_csb_vram_pin(adev);
3312 if (r)
3313 return r;
3314
fdb81fd7 3315 r = adev->gfx.rlc.funcs->resume(adev);
b1023571
KW
3316 if (r)
3317 return r;
3318
3319 r = gfx_v9_0_cp_resume(adev);
3320 if (r)
3321 return r;
3322
3323 r = gfx_v9_0_ngg_en(adev);
3324 if (r)
3325 return r;
3326
3327 return r;
3328}
3329
ffabea84 3330static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
85f95ad6 3331{
ffabea84
RZ
3332 int r, i;
3333 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
85f95ad6 3334
ffabea84
RZ
3335 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3336 if (r)
85f95ad6 3337 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
85f95ad6 3338
ffabea84
RZ
3339 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3340 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3341
3342 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3343 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
85f95ad6
ML
3344 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3345 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3346 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3347 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
ffabea84
RZ
3348 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3349 amdgpu_ring_write(kiq_ring, 0);
3350 amdgpu_ring_write(kiq_ring, 0);
3351 amdgpu_ring_write(kiq_ring, 0);
3352 }
c66ed765 3353 r = amdgpu_ring_test_helper(kiq_ring);
841cf911
RZ
3354 if (r)
3355 DRM_ERROR("KCQ disable failed\n");
3356
85f95ad6
ML
3357 return r;
3358}
3359
b1023571
KW
3360static int gfx_v9_0_hw_fini(void *handle)
3361{
3362 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3363
760a1d55 3364 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
b1023571
KW
3365 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3366 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
85f95ad6
ML
3367
3368 /* disable KCQ to avoid CPC touch memory not valid anymore */
ffabea84 3369 gfx_v9_0_kcq_disable(adev);
85f95ad6 3370
464826d6 3371 if (amdgpu_sriov_vf(adev)) {
9f0178fb
ML
3372 gfx_v9_0_cp_gfx_enable(adev, false);
3373 /* must disable polling for SRIOV when hw finished, otherwise
3374 * CPC engine may still keep fetching WB address which is already
3375 * invalid after sw finished and trigger DMAR reading error in
3376 * hypervisor side.
3377 */
3378 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
464826d6
XY
3379 return 0;
3380 }
326aa996
AG
3381
3382 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3383 * otherwise KIQ is hanging when binding back
3384 */
44779b43 3385 if (!adev->in_gpu_reset && !adev->in_suspend) {
326aa996
AG
3386 mutex_lock(&adev->srbm_mutex);
3387 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3388 adev->gfx.kiq.ring.pipe,
3389 adev->gfx.kiq.ring.queue, 0);
3390 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3391 soc15_grbm_select(adev, 0, 0, 0, 0);
3392 mutex_unlock(&adev->srbm_mutex);
3393 }
3394
b1023571 3395 gfx_v9_0_cp_enable(adev, false);
fdb81fd7 3396 adev->gfx.rlc.funcs->stop(adev);
b1023571 3397
137dc4b9
EQ
3398 gfx_v9_0_csb_vram_unpin(adev);
3399
b1023571
KW
3400 return 0;
3401}
3402
3403static int gfx_v9_0_suspend(void *handle)
3404{
44779b43 3405 return gfx_v9_0_hw_fini(handle);
b1023571
KW
3406}
3407
3408static int gfx_v9_0_resume(void *handle)
3409{
44779b43 3410 return gfx_v9_0_hw_init(handle);
b1023571
KW
3411}
3412
3413static bool gfx_v9_0_is_idle(void *handle)
3414{
3415 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3416
5e78835a 3417 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
b1023571
KW
3418 GRBM_STATUS, GUI_ACTIVE))
3419 return false;
3420 else
3421 return true;
3422}
3423
3424static int gfx_v9_0_wait_for_idle(void *handle)
3425{
3426 unsigned i;
b1023571
KW
3427 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3428
3429 for (i = 0; i < adev->usec_timeout; i++) {
2b9bdfa7 3430 if (gfx_v9_0_is_idle(handle))
b1023571
KW
3431 return 0;
3432 udelay(1);
3433 }
3434 return -ETIMEDOUT;
3435}
3436
b1023571
KW
3437static int gfx_v9_0_soft_reset(void *handle)
3438{
3439 u32 grbm_soft_reset = 0;
3440 u32 tmp;
3441 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3442
3443 /* GRBM_STATUS */
5e78835a 3444 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
b1023571
KW
3445 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3446 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3447 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3448 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3449 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3450 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3451 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3452 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3453 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3454 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3455 }
3456
3457 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3458 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3459 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3460 }
3461
3462 /* GRBM_STATUS2 */
5e78835a 3463 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
b1023571
KW
3464 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3465 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3466 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3467
3468
75bac5c6 3469 if (grbm_soft_reset) {
b1023571 3470 /* stop the rlc */
fdb81fd7 3471 adev->gfx.rlc.funcs->stop(adev);
b1023571
KW
3472
3473 /* Disable GFX parsing/prefetching */
3474 gfx_v9_0_cp_gfx_enable(adev, false);
3475
3476 /* Disable MEC parsing/prefetching */
3477 gfx_v9_0_cp_compute_enable(adev, false);
3478
3479 if (grbm_soft_reset) {
5e78835a 3480 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
b1023571
KW
3481 tmp |= grbm_soft_reset;
3482 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5e78835a
TSD
3483 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3484 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
b1023571
KW
3485
3486 udelay(50);
3487
3488 tmp &= ~grbm_soft_reset;
5e78835a
TSD
3489 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3490 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
b1023571
KW
3491 }
3492
3493 /* Wait a little for things to settle down */
3494 udelay(50);
b1023571
KW
3495 }
3496 return 0;
3497}
3498
3499static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3500{
3501 uint64_t clock;
3502
3503 mutex_lock(&adev->gfx.gpu_clock_mutex);
5e78835a
TSD
3504 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3505 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3506 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
b1023571
KW
3507 mutex_unlock(&adev->gfx.gpu_clock_mutex);
3508 return clock;
3509}
3510
3511static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3512 uint32_t vmid,
3513 uint32_t gds_base, uint32_t gds_size,
3514 uint32_t gws_base, uint32_t gws_size,
3515 uint32_t oa_base, uint32_t oa_size)
3516{
946a4d5b
SL
3517 struct amdgpu_device *adev = ring->adev;
3518
b1023571
KW
3519 /* GDS Base */
3520 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 3521 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
b1023571
KW
3522 gds_base);
3523
3524 /* GDS Size */
3525 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 3526 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
b1023571
KW
3527 gds_size);
3528
3529 /* GWS */
3530 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 3531 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
b1023571
KW
3532 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3533
3534 /* OA */
3535 gfx_v9_0_write_data_to_reg(ring, 0, false,
946a4d5b 3536 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
b1023571
KW
3537 (1 << (oa_size + oa_base)) - (1 << oa_base));
3538}
3539
5326ad54
JZ
3540static const u32 vgpr_init_compute_shader[] =
3541{
3542 0xb07c0000, 0xbe8000ff,
3543 0x000000f8, 0xbf110800,
3544 0x7e000280, 0x7e020280,
3545 0x7e040280, 0x7e060280,
3546 0x7e080280, 0x7e0a0280,
3547 0x7e0c0280, 0x7e0e0280,
3548 0x80808800, 0xbe803200,
3549 0xbf84fff5, 0xbf9c0000,
3550 0xd28c0001, 0x0001007f,
3551 0xd28d0001, 0x0002027e,
3552 0x10020288, 0xb8810904,
3553 0xb7814000, 0xd1196a01,
3554 0x00000301, 0xbe800087,
3555 0xbefc00c1, 0xd89c4000,
3556 0x00020201, 0xd89cc080,
3557 0x00040401, 0x320202ff,
3558 0x00000800, 0x80808100,
3559 0xbf84fff8, 0x7e020280,
3560 0xbf810000, 0x00000000,
3561};
3562
3563static const u32 sgpr_init_compute_shader[] =
3564{
3565 0xb07c0000, 0xbe8000ff,
3566 0x0000005f, 0xbee50080,
3567 0xbe812c65, 0xbe822c65,
3568 0xbe832c65, 0xbe842c65,
3569 0xbe852c65, 0xb77c0005,
3570 0x80808500, 0xbf84fff8,
3571 0xbe800080, 0xbf810000,
3572};
3573
3574static const struct soc15_reg_entry vgpr_init_regs[] = {
3575 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3576 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3581 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3582 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3583 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3584 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
3585};
3586
3587static const struct soc15_reg_entry sgpr_init_regs[] = {
3588 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3589 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3590 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3591 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3592 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3593 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3594 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3595 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3596 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3597 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3598};
3599
3600static const struct soc15_reg_entry sec_ded_counter_registers[] = {
052af915
JZ
3601 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
3602 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
3603 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
3604 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
3605 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
3606 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
3607 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
3608 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
3609 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
3610 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
3611 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
3612 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
3613 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
3614 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
3615 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
3616 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
3617 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
3618 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
3619 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
3620 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
3621 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
3622 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
3623 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
3624 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
3625 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
3626 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
3627 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
3628 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
3629 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
3630 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
3631 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
3632 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
5326ad54
JZ
3633};
3634
df0a8064
JZ
3635static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
3636{
3637 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
eb03e795 3638 int i, r;
df0a8064 3639
eb03e795 3640 r = amdgpu_ring_alloc(ring, 7);
df0a8064
JZ
3641 if (r) {
3642 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
3643 ring->name, r);
3644 return r;
3645 }
3646
eb03e795
JZ
3647 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
3648 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
df0a8064
JZ
3649
3650 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3651 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
3652 PACKET3_DMA_DATA_DST_SEL(1) |
3653 PACKET3_DMA_DATA_SRC_SEL(2) |
3654 PACKET3_DMA_DATA_ENGINE(0)));
3655 amdgpu_ring_write(ring, 0);
3656 amdgpu_ring_write(ring, 0);
3657 amdgpu_ring_write(ring, 0);
3658 amdgpu_ring_write(ring, 0);
3659 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
3660 adev->gds.gds_size);
3661
df0a8064
JZ
3662 amdgpu_ring_commit(ring);
3663
eb03e795
JZ
3664 for (i = 0; i < adev->usec_timeout; i++) {
3665 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
3666 break;
3667 udelay(1);
3668 }
3669
3670 if (i >= adev->usec_timeout)
3671 r = -ETIMEDOUT;
3672
3673 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
df0a8064 3674
eb03e795
JZ
3675 return r;
3676}
df0a8064 3677
5326ad54
JZ
3678static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3679{
3680 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3681 struct amdgpu_ib ib;
3682 struct dma_fence *f = NULL;
052af915 3683 int r, i, j, k;
5326ad54
JZ
3684 unsigned total_size, vgpr_offset, sgpr_offset;
3685 u64 gpu_addr;
3686
3687 /* only support when RAS is enabled */
3688 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3689 return 0;
3690
3691 /* bail if the compute ring is not ready */
3692 if (!ring->sched.ready)
3693 return 0;
3694
5326ad54
JZ
3695 total_size =
3696 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3697 total_size +=
3698 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3699 total_size = ALIGN(total_size, 256);
3700 vgpr_offset = total_size;
3701 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3702 sgpr_offset = total_size;
3703 total_size += sizeof(sgpr_init_compute_shader);
3704
3705 /* allocate an indirect buffer to put the commands in */
3706 memset(&ib, 0, sizeof(ib));
3707 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3708 if (r) {
3709 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3710 return r;
3711 }
3712
3713 /* load the compute shaders */
3714 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3715 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3716
3717 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3718 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3719
3720 /* init the ib length to 0 */
3721 ib.length_dw = 0;
3722
3723 /* VGPR */
3724 /* write the register state for the compute dispatch */
3725 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3727 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3728 - PACKET3_SET_SH_REG_START;
3729 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3730 }
3731 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3732 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3734 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3735 - PACKET3_SET_SH_REG_START;
3736 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3737 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3738
3739 /* write dispatch packet */
3740 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3741 ib.ptr[ib.length_dw++] = 128; /* x */
3742 ib.ptr[ib.length_dw++] = 1; /* y */
3743 ib.ptr[ib.length_dw++] = 1; /* z */
3744 ib.ptr[ib.length_dw++] =
3745 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3746
3747 /* write CS partial flush packet */
3748 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3749 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3750
3751 /* SGPR */
3752 /* write the register state for the compute dispatch */
3753 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3754 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3755 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3756 - PACKET3_SET_SH_REG_START;
3757 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3758 }
3759 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3760 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3761 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3762 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3763 - PACKET3_SET_SH_REG_START;
3764 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3765 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3766
3767 /* write dispatch packet */
3768 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3769 ib.ptr[ib.length_dw++] = 128; /* x */
3770 ib.ptr[ib.length_dw++] = 1; /* y */
3771 ib.ptr[ib.length_dw++] = 1; /* z */
3772 ib.ptr[ib.length_dw++] =
3773 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3774
3775 /* write CS partial flush packet */
3776 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3777 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3778
3779 /* shedule the ib on the ring */
3780 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3781 if (r) {
3782 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3783 goto fail;
3784 }
3785
3786 /* wait for the GPU to finish processing the IB */
3787 r = dma_fence_wait(f, false);
3788 if (r) {
3789 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3790 goto fail;
3791 }
3792
3793 /* read back registers to clear the counters */
3794 mutex_lock(&adev->grbm_idx_mutex);
052af915
JZ
3795 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
3796 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
3797 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
3798 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
3799 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3800 }
3801 }
5326ad54
JZ
3802 }
3803 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3804 mutex_unlock(&adev->grbm_idx_mutex);
3805
3806fail:
3807 amdgpu_ib_free(adev, &ib, NULL);
3808 dma_fence_put(f);
3809
3810 return r;
3811}
3812
b1023571
KW
3813static int gfx_v9_0_early_init(void *handle)
3814{
3815 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3816
3817 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
78c16834 3818 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
b1023571
KW
3819 gfx_v9_0_set_ring_funcs(adev);
3820 gfx_v9_0_set_irq_funcs(adev);
3821 gfx_v9_0_set_gds_init(adev);
3822 gfx_v9_0_set_rlc_funcs(adev);
3823
3824 return 0;
3825}
3826
760a1d55
FX
3827static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3828 struct amdgpu_iv_entry *entry);
3829
3830static int gfx_v9_0_ecc_late_init(void *handle)
3831{
3832 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3833 struct ras_common_if **ras_if = &adev->gfx.ras_if;
3834 struct ras_ih_if ih_info = {
3835 .cb = gfx_v9_0_process_ras_data_cb,
3836 };
3837 struct ras_fs_if fs_info = {
3838 .sysfs_name = "gfx_err_count",
3839 .debugfs_name = "gfx_err_inject",
3840 };
3841 struct ras_common_if ras_block = {
3842 .block = AMDGPU_RAS_BLOCK__GFX,
3843 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3844 .sub_block_index = 0,
3845 .name = "gfx",
3846 };
3847 int r;
3848
3849 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
a170d49d 3850 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
760a1d55
FX
3851 return 0;
3852 }
3853
df0a8064
JZ
3854 r = gfx_v9_0_do_edc_gds_workarounds(adev);
3855 if (r)
3856 return r;
3857
3058770a
JZ
3858 /* requires IBs so do in late init after IB pool is initialized */
3859 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3860 if (r)
3861 return r;
3862
2c2fc0cd 3863 /* handle resume path. */
3864 if (*ras_if) {
3865 /* resend ras TA enable cmd during resume.
3866 * prepare to handle failure.
3867 */
3868 ih_info.head = **ras_if;
3869 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3870 if (r) {
3871 if (r == -EAGAIN) {
3872 /* request a gpu reset. will run again. */
3873 amdgpu_ras_request_reset_on_boot(adev,
3874 AMDGPU_RAS_BLOCK__GFX);
3875 return 0;
3876 }
3877 /* fail to enable ras, cleanup all. */
3878 goto irq;
3879 }
3880 /* enable successfully. continue. */
acbbee01 3881 goto resume;
2c2fc0cd 3882 }
acbbee01 3883
760a1d55
FX
3884 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3885 if (!*ras_if)
3886 return -ENOMEM;
3887
3888 **ras_if = ras_block;
3889
a170d49d 3890 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
70ab8c61 3891 if (r) {
3892 if (r == -EAGAIN) {
3893 amdgpu_ras_request_reset_on_boot(adev,
3894 AMDGPU_RAS_BLOCK__GFX);
3895 r = 0;
3896 }
760a1d55 3897 goto feature;
70ab8c61 3898 }
760a1d55
FX
3899
3900 ih_info.head = **ras_if;
3901 fs_info.head = **ras_if;
3902
3903 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3904 if (r)
3905 goto interrupt;
3906
450f30ea 3907 amdgpu_ras_debugfs_create(adev, &fs_info);
760a1d55
FX
3908
3909 r = amdgpu_ras_sysfs_create(adev, &fs_info);
3910 if (r)
3911 goto sysfs;
acbbee01 3912resume:
760a1d55
FX
3913 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3914 if (r)
3915 goto irq;
3916
3917 return 0;
3918irq:
3919 amdgpu_ras_sysfs_remove(adev, *ras_if);
3920sysfs:
3921 amdgpu_ras_debugfs_remove(adev, *ras_if);
760a1d55
FX
3922 amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3923interrupt:
3924 amdgpu_ras_feature_enable(adev, *ras_if, 0);
3925feature:
3926 kfree(*ras_if);
3927 *ras_if = NULL;
70ab8c61 3928 return r;
760a1d55
FX
3929}
3930
b1023571
KW
3931static int gfx_v9_0_late_init(void *handle)
3932{
3933 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3934 int r;
3935
3936 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3937 if (r)
3938 return r;
3939
3940 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3941 if (r)
3942 return r;
3943
760a1d55
FX
3944 r = gfx_v9_0_ecc_late_init(handle);
3945 if (r)
3946 return r;
3947
b1023571
KW
3948 return 0;
3949}
3950
106c7d61 3951static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
b1023571 3952{
106c7d61 3953 uint32_t rlc_setting;
b1023571
KW
3954
3955 /* if RLC is not enabled, do nothing */
5e78835a 3956 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
b1023571 3957 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
106c7d61 3958 return false;
b1023571 3959
106c7d61 3960 return true;
b1023571
KW
3961}
3962
106c7d61 3963static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
b1023571 3964{
106c7d61
LG
3965 uint32_t data;
3966 unsigned i;
b1023571 3967
106c7d61
LG
3968 data = RLC_SAFE_MODE__CMD_MASK;
3969 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3970 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
b1023571 3971
106c7d61
LG
3972 /* wait for RLC_SAFE_MODE */
3973 for (i = 0; i < adev->usec_timeout; i++) {
3974 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3975 break;
3976 udelay(1);
b1023571
KW
3977 }
3978}
3979
106c7d61
LG
3980static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3981{
3982 uint32_t data;
3983
3984 data = RLC_SAFE_MODE__CMD_MASK;
3985 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3986}
3987
197f95c8
HZ
3988static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3989 bool enable)
3990{
106c7d61 3991 amdgpu_gfx_rlc_enter_safe_mode(adev);
197f95c8
HZ
3992
3993 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3994 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3995 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3996 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3997 } else {
3998 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3999 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4000 }
4001
106c7d61 4002 amdgpu_gfx_rlc_exit_safe_mode(adev);
197f95c8
HZ
4003}
4004
18924c71
HZ
4005static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4006 bool enable)
4007{
4008 /* TODO: double check if we need to perform under safe mode */
4009 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4010
4011 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4012 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4013 else
4014 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4015
4016 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4017 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4018 else
4019 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4020
4021 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4022}
4023
b1023571
KW
4024static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4025 bool enable)
4026{
4027 uint32_t data, def;
4028
a7a0d543
LG
4029 amdgpu_gfx_rlc_enter_safe_mode(adev);
4030
b1023571
KW
4031 /* It is disabled by HW by default */
4032 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4033 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
5e78835a 4034 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
c3693768
EQ
4035
4036 if (adev->asic_type != CHIP_VEGA12)
4037 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4038
4039 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
b1023571
KW
4040 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4041 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4042
4043 /* only for Vega10 & Raven1 */
4044 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4045
4046 if (def != data)
5e78835a 4047 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
b1023571
KW
4048
4049 /* MGLS is a global flag to control all MGLS in GFX */
4050 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4051 /* 2 - RLC memory Light sleep */
4052 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5e78835a 4053 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
b1023571
KW
4054 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4055 if (def != data)
5e78835a 4056 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
b1023571
KW
4057 }
4058 /* 3 - CP memory Light sleep */
4059 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5e78835a 4060 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
b1023571
KW
4061 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4062 if (def != data)
5e78835a 4063 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
b1023571
KW
4064 }
4065 }
4066 } else {
4067 /* 1 - MGCG_OVERRIDE */
5e78835a 4068 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
c3693768
EQ
4069
4070 if (adev->asic_type != CHIP_VEGA12)
4071 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4072
4073 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
b1023571
KW
4074 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4075 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4076 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
c3693768 4077
b1023571 4078 if (def != data)
5e78835a 4079 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
b1023571
KW
4080
4081 /* 2 - disable MGLS in RLC */
5e78835a 4082 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
b1023571
KW
4083 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4084 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5e78835a 4085 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
b1023571
KW
4086 }
4087
4088 /* 3 - disable MGLS in CP */
5e78835a 4089 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
b1023571
KW
4090 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4091 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5e78835a 4092 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
b1023571
KW
4093 }
4094 }
a7a0d543
LG
4095
4096 amdgpu_gfx_rlc_exit_safe_mode(adev);
b1023571
KW
4097}
4098
4099static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4100 bool enable)
4101{
4102 uint32_t data, def;
4103
106c7d61 4104 amdgpu_gfx_rlc_enter_safe_mode(adev);
b1023571
KW
4105
4106 /* Enable 3D CGCG/CGLS */
4107 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4108 /* write cmd to clear cgcg/cgls ov */
5e78835a 4109 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
b1023571
KW
4110 /* unset CGCG override */
4111 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4112 /* update CGCG and CGLS override bits */
4113 if (def != data)
5e78835a 4114 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
a5aedc2d
EQ
4115
4116 /* enable 3Dcgcg FSM(0x0000363f) */
5e78835a 4117 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
a5aedc2d
EQ
4118
4119 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
b1023571
KW
4120 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4121 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4122 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4123 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4124 if (def != data)
5e78835a 4125 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
b1023571
KW
4126
4127 /* set IDLE_POLL_COUNT(0x00900100) */
5e78835a 4128 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
b1023571
KW
4129 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4130 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4131 if (def != data)
5e78835a 4132 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
b1023571
KW
4133 } else {
4134 /* Disable CGCG/CGLS */
5e78835a 4135 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
b1023571
KW
4136 /* disable cgcg, cgls should be disabled */
4137 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4138 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4139 /* disable cgcg and cgls in FSM */
4140 if (def != data)
5e78835a 4141 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
b1023571
KW
4142 }
4143
106c7d61 4144 amdgpu_gfx_rlc_exit_safe_mode(adev);
b1023571
KW
4145}
4146
4147static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4148 bool enable)
4149{
4150 uint32_t def, data;
4151
106c7d61 4152 amdgpu_gfx_rlc_enter_safe_mode(adev);
b1023571
KW
4153
4154 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5e78835a 4155 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
b1023571
KW
4156 /* unset CGCG override */
4157 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4158 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4159 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4160 else
4161 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4162 /* update CGCG and CGLS override bits */
4163 if (def != data)
5e78835a 4164 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
b1023571 4165
a5aedc2d 4166 /* enable cgcg FSM(0x0000363F) */
5e78835a 4167 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
a5aedc2d
EQ
4168
4169 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
b1023571
KW
4170 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4171 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4172 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4173 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4174 if (def != data)
5e78835a 4175 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
b1023571
KW
4176
4177 /* set IDLE_POLL_COUNT(0x00900100) */
5e78835a 4178 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
b1023571
KW
4179 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4180 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4181 if (def != data)
5e78835a 4182 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
b1023571 4183 } else {
5e78835a 4184 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
b1023571
KW
4185 /* reset CGCG/CGLS bits */
4186 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4187 /* disable cgcg and cgls in FSM */
4188 if (def != data)
5e78835a 4189 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
b1023571
KW
4190 }
4191
106c7d61 4192 amdgpu_gfx_rlc_exit_safe_mode(adev);
b1023571
KW
4193}
4194
4195static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4196 bool enable)
4197{
4198 if (enable) {
4199 /* CGCG/CGLS should be enabled after MGCG/MGLS
4200 * === MGCG + MGLS ===
4201 */
4202 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4203 /* === CGCG /CGLS for GFX 3D Only === */
4204 gfx_v9_0_update_3d_clock_gating(adev, enable);
4205 /* === CGCG + CGLS === */
4206 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4207 } else {
4208 /* CGCG/CGLS should be disabled before MGCG/MGLS
4209 * === CGCG + CGLS ===
4210 */
4211 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4212 /* === CGCG /CGLS for GFX 3D Only === */
4213 gfx_v9_0_update_3d_clock_gating(adev, enable);
4214 /* === MGCG + MGLS === */
4215 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4216 }
4217 return 0;
4218}
4219
4220static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
106c7d61
LG
4221 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4222 .set_safe_mode = gfx_v9_0_set_safe_mode,
4223 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
fdb81fd7 4224 .init = gfx_v9_0_rlc_init,
106c7d61
LG
4225 .get_csb_size = gfx_v9_0_get_csb_size,
4226 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4227 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
fdb81fd7
LG
4228 .resume = gfx_v9_0_rlc_resume,
4229 .stop = gfx_v9_0_rlc_stop,
4230 .reset = gfx_v9_0_rlc_reset,
4231 .start = gfx_v9_0_rlc_start
b1023571
KW
4232};
4233
4234static int gfx_v9_0_set_powergating_state(void *handle,
4235 enum amd_powergating_state state)
4236{
5897c99e 4237 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
197f95c8 4238 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5897c99e
HZ
4239
4240 switch (adev->asic_type) {
4241 case CHIP_RAVEN:
05df1f01
RZ
4242 if (!enable) {
4243 amdgpu_gfx_off_ctrl(adev, false);
4244 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4245 }
5897c99e
HZ
4246 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4247 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4248 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4249 } else {
4250 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4251 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4252 }
4253
4254 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4255 gfx_v9_0_enable_cp_power_gating(adev, true);
4256 else
4257 gfx_v9_0_enable_cp_power_gating(adev, false);
197f95c8
HZ
4258
4259 /* update gfx cgpg state */
4260 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
18924c71
HZ
4261
4262 /* update mgcg state */
4263 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
9134c6d7 4264
05df1f01
RZ
4265 if (enable)
4266 amdgpu_gfx_off_ctrl(adev, true);
991a6b32
EQ
4267 break;
4268 case CHIP_VEGA12:
05df1f01
RZ
4269 if (!enable) {
4270 amdgpu_gfx_off_ctrl(adev, false);
4271 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4272 } else {
4273 amdgpu_gfx_off_ctrl(adev, true);
4274 }
5897c99e
HZ
4275 break;
4276 default:
4277 break;
4278 }
4279
b1023571
KW
4280 return 0;
4281}
4282
4283static int gfx_v9_0_set_clockgating_state(void *handle,
4284 enum amd_clockgating_state state)
4285{
4286 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4287
fb82afab
XY
4288 if (amdgpu_sriov_vf(adev))
4289 return 0;
4290
b1023571
KW
4291 switch (adev->asic_type) {
4292 case CHIP_VEGA10:
23862464 4293 case CHIP_VEGA12:
28b576b2 4294 case CHIP_VEGA20:
a4dc61f5 4295 case CHIP_RAVEN:
b1023571
KW
4296 gfx_v9_0_update_gfx_clock_gating(adev,
4297 state == AMD_CG_STATE_GATE ? true : false);
4298 break;
4299 default:
4300 break;
4301 }
4302 return 0;
4303}
4304
12ad27fa
HR
4305static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4306{
4307 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4308 int data;
4309
4310 if (amdgpu_sriov_vf(adev))
4311 *flags = 0;
4312
4313 /* AMD_CG_SUPPORT_GFX_MGCG */
5e78835a 4314 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
12ad27fa
HR
4315 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4316 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4317
4318 /* AMD_CG_SUPPORT_GFX_CGCG */
5e78835a 4319 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
12ad27fa
HR
4320 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4321 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4322
4323 /* AMD_CG_SUPPORT_GFX_CGLS */
4324 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4325 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4326
4327 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5e78835a 4328 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
12ad27fa
HR
4329 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4330 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4331
4332 /* AMD_CG_SUPPORT_GFX_CP_LS */
5e78835a 4333 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
12ad27fa
HR
4334 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4335 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4336
4337 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5e78835a 4338 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
12ad27fa
HR
4339 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4340 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4341
4342 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4343 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4344 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4345}
4346
b1023571
KW
4347static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4348{
4349 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4350}
4351
4352static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4353{
4354 struct amdgpu_device *adev = ring->adev;
4355 u64 wptr;
4356
4357 /* XXX check if swapping is necessary on BE */
4358 if (ring->use_doorbell) {
4359 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4360 } else {
5e78835a
TSD
4361 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4362 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
b1023571
KW
4363 }
4364
4365 return wptr;
4366}
4367
4368static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4369{
4370 struct amdgpu_device *adev = ring->adev;
4371
4372 if (ring->use_doorbell) {
4373 /* XXX check if swapping is necessary on BE */
4374 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4375 WDOORBELL64(ring->doorbell_index, ring->wptr);
4376 } else {
5e78835a
TSD
4377 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4378 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
b1023571
KW
4379 }
4380}
4381
4382static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4383{
946a4d5b 4384 struct amdgpu_device *adev = ring->adev;
b1023571 4385 u32 ref_and_mask, reg_mem_engine;
bf383fb6 4386 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
b1023571
KW
4387
4388 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4389 switch (ring->me) {
4390 case 1:
4391 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4392 break;
4393 case 2:
4394 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4395 break;
4396 default:
4397 return;
4398 }
4399 reg_mem_engine = 0;
4400 } else {
4401 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4402 reg_mem_engine = 1; /* pfp */
4403 }
4404
4405 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
946a4d5b
SL
4406 adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4407 adev->nbio_funcs->get_hdp_flush_done_offset(adev),
b1023571
KW
4408 ref_and_mask, ref_and_mask, 0x20);
4409}
4410
b1023571 4411static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
34955e03
RZ
4412 struct amdgpu_job *job,
4413 struct amdgpu_ib *ib,
c4c905ec 4414 uint32_t flags)
b1023571 4415{
34955e03 4416 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
eaa05d52 4417 u32 header, control = 0;
b1023571 4418
eaa05d52
ML
4419 if (ib->flags & AMDGPU_IB_FLAG_CE)
4420 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4421 else
4422 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
b1023571 4423
c4f46f22 4424 control |= ib->length_dw | (vmid << 24);
b1023571 4425
635e7132 4426 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
eaa05d52 4427 control |= INDIRECT_BUFFER_PRE_ENB(1);
9ccd52eb 4428
635e7132
ML
4429 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4430 gfx_v9_0_ring_emit_de_meta(ring);
4431 }
4432
eaa05d52 4433 amdgpu_ring_write(ring, header);
72408a41 4434 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
eaa05d52 4435 amdgpu_ring_write(ring,
b1023571 4436#ifdef __BIG_ENDIAN
eaa05d52 4437 (2 << 0) |
b1023571 4438#endif
eaa05d52
ML
4439 lower_32_bits(ib->gpu_addr));
4440 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4441 amdgpu_ring_write(ring, control);
b1023571
KW
4442}
4443
b1023571 4444static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
34955e03
RZ
4445 struct amdgpu_job *job,
4446 struct amdgpu_ib *ib,
c4c905ec 4447 uint32_t flags)
b1023571 4448{
34955e03
RZ
4449 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4450 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
b1023571 4451
41cca166
MO
4452 /* Currently, there is a high possibility to get wave ID mismatch
4453 * between ME and GDS, leading to a hw deadlock, because ME generates
4454 * different wave IDs than the GDS expects. This situation happens
4455 * randomly when at least 5 compute pipes use GDS ordered append.
4456 * The wave IDs generated by ME are also wrong after suspend/resume.
4457 * Those are probably bugs somewhere else in the kernel driver.
4458 *
4459 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4460 * GDS to 0 for this ring (me/pipe).
4461 */
4462 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4463 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4464 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4465 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4466 }
4467
34955e03 4468 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
b1023571 4469 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
34955e03 4470 amdgpu_ring_write(ring,
b1023571 4471#ifdef __BIG_ENDIAN
34955e03 4472 (2 << 0) |
b1023571 4473#endif
34955e03
RZ
4474 lower_32_bits(ib->gpu_addr));
4475 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4476 amdgpu_ring_write(ring, control);
b1023571
KW
4477}
4478
4479static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4480 u64 seq, unsigned flags)
4481{
4482 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4483 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
d240cd9e 4484 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
b1023571
KW
4485
4486 /* RELEASE_MEM - flush caches, send int */
4487 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
d240cd9e
MO
4488 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4489 EOP_TC_NC_ACTION_EN) :
4490 (EOP_TCL1_ACTION_EN |
4491 EOP_TC_ACTION_EN |
4492 EOP_TC_WB_ACTION_EN |
4493 EOP_TC_MD_ACTION_EN)) |
b1023571
KW
4494 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4495 EVENT_INDEX(5)));
4496 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4497
4498 /*
4499 * the address should be Qword aligned if 64bit write, Dword
4500 * aligned if only send 32bit data low (discard data high)
4501 */
4502 if (write64bit)
4503 BUG_ON(addr & 0x7);
4504 else
4505 BUG_ON(addr & 0x3);
4506 amdgpu_ring_write(ring, lower_32_bits(addr));
4507 amdgpu_ring_write(ring, upper_32_bits(addr));
4508 amdgpu_ring_write(ring, lower_32_bits(seq));
4509 amdgpu_ring_write(ring, upper_32_bits(seq));
4510 amdgpu_ring_write(ring, 0);
4511}
4512
4513static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4514{
4515 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4516 uint32_t seq = ring->fence_drv.sync_seq;
4517 uint64_t addr = ring->fence_drv.gpu_addr;
4518
4519 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4520 lower_32_bits(addr), upper_32_bits(addr),
4521 seq, 0xffffffff, 4);
4522}
4523
4524static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
c633c00b 4525 unsigned vmid, uint64_t pd_addr)
b1023571 4526{
c633c00b 4527 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
b1023571 4528
b1023571 4529 /* compute doesn't have PFP */
9096d6e5 4530 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
b1023571
KW
4531 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4532 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4533 amdgpu_ring_write(ring, 0x0);
b1023571
KW
4534 }
4535}
4536
4537static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4538{
4539 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4540}
4541
4542static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4543{
4544 u64 wptr;
4545
4546 /* XXX check if swapping is necessary on BE */
4547 if (ring->use_doorbell)
4548 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4549 else
4550 BUG();
4551 return wptr;
4552}
4553
761c77c1
AR
4554static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4555 bool acquire)
4556{
4557 struct amdgpu_device *adev = ring->adev;
4558 int pipe_num, tmp, reg;
4559 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4560
4561 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4562
4563 /* first me only has 2 entries, GFX and HP3D */
4564 if (ring->me > 0)
4565 pipe_num -= 2;
4566
4567 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4568 tmp = RREG32(reg);
4569 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4570 WREG32(reg, tmp);
4571}
4572
4573static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4574 struct amdgpu_ring *ring,
4575 bool acquire)
4576{
4577 int i, pipe;
4578 bool reserve;
4579 struct amdgpu_ring *iring;
4580
4581 mutex_lock(&adev->gfx.pipe_reserve_mutex);
7470bfcf 4582 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
761c77c1
AR
4583 if (acquire)
4584 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4585 else
4586 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4587
4588 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4589 /* Clear all reservations - everyone reacquires all resources */
4590 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4591 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4592 true);
4593
4594 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4595 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4596 true);
4597 } else {
4598 /* Lower all pipes without a current reservation */
4599 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4600 iring = &adev->gfx.gfx_ring[i];
7470bfcf
HZ
4601 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4602 iring->me,
4603 iring->pipe,
4604 0);
761c77c1
AR
4605 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4606 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4607 }
4608
4609 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4610 iring = &adev->gfx.compute_ring[i];
7470bfcf
HZ
4611 pipe = amdgpu_gfx_mec_queue_to_bit(adev,
4612 iring->me,
4613 iring->pipe,
4614 0);
761c77c1
AR
4615 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4616 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4617 }
4618 }
4619
4620 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4621}
4622
4623static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4624 struct amdgpu_ring *ring,
4625 bool acquire)
4626{
4627 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4628 uint32_t queue_priority = acquire ? 0xf : 0x0;
4629
4630 mutex_lock(&adev->srbm_mutex);
4631 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4632
1bff7f6c
TH
4633 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4634 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
761c77c1
AR
4635
4636 soc15_grbm_select(adev, 0, 0, 0, 0);
4637 mutex_unlock(&adev->srbm_mutex);
4638}
4639
4640static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4641 enum drm_sched_priority priority)
4642{
4643 struct amdgpu_device *adev = ring->adev;
4644 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4645
4646 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4647 return;
4648
4649 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4650 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4651}
4652
b1023571
KW
4653static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4654{
4655 struct amdgpu_device *adev = ring->adev;
4656
4657 /* XXX check if swapping is necessary on BE */
4658 if (ring->use_doorbell) {
4659 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4660 WDOORBELL64(ring->doorbell_index, ring->wptr);
4661 } else{
4662 BUG(); /* only DOORBELL method supported on gfx9 now */
4663 }
4664}
4665
aa6faa44
XY
4666static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4667 u64 seq, unsigned int flags)
4668{
cd29253f
SL
4669 struct amdgpu_device *adev = ring->adev;
4670
aa6faa44
XY
4671 /* we only allocate 32bit for each seq wb address */
4672 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4673
4674 /* write fence seq to the "addr" */
4675 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4676 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4677 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4678 amdgpu_ring_write(ring, lower_32_bits(addr));
4679 amdgpu_ring_write(ring, upper_32_bits(addr));
4680 amdgpu_ring_write(ring, lower_32_bits(seq));
4681
4682 if (flags & AMDGPU_FENCE_FLAG_INT) {
4683 /* set register to trigger INT */
4684 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4685 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4686 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4687 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4688 amdgpu_ring_write(ring, 0);
4689 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4690 }
4691}
4692
b1023571
KW
4693static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4694{
4695 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4696 amdgpu_ring_write(ring, 0);
4697}
4698
cca02cd3
XY
4699static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4700{
d81a2209 4701 struct v9_ce_ib_state ce_payload = {0};
cca02cd3
XY
4702 uint64_t csa_addr;
4703 int cnt;
4704
4705 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
6f05c4e9 4706 csa_addr = amdgpu_csa_vaddr(ring->adev);
cca02cd3
XY
4707
4708 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4709 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4710 WRITE_DATA_DST_SEL(8) |
4711 WR_CONFIRM) |
4712 WRITE_DATA_CACHE_POLICY(0));
4713 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4714 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4715 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4716}
4717
4718static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4719{
d81a2209 4720 struct v9_de_ib_state de_payload = {0};
cca02cd3
XY
4721 uint64_t csa_addr, gds_addr;
4722 int cnt;
4723
6f05c4e9 4724 csa_addr = amdgpu_csa_vaddr(ring->adev);
cca02cd3
XY
4725 gds_addr = csa_addr + 4096;
4726 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4727 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4728
4729 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4730 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4731 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4732 WRITE_DATA_DST_SEL(8) |
4733 WR_CONFIRM) |
4734 WRITE_DATA_CACHE_POLICY(0));
4735 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4736 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4737 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4738}
4739
2ea6ab27
ML
4740static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4741{
4742 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4743 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4744}
4745
b1023571
KW
4746static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4747{
4748 uint32_t dw2 = 0;
4749
cca02cd3
XY
4750 if (amdgpu_sriov_vf(ring->adev))
4751 gfx_v9_0_ring_emit_ce_meta(ring);
4752
2ea6ab27
ML
4753 gfx_v9_0_ring_emit_tmz(ring, true);
4754
b1023571
KW
4755 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4756 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4757 /* set load_global_config & load_global_uconfig */
4758 dw2 |= 0x8001;
4759 /* set load_cs_sh_regs */
4760 dw2 |= 0x01000000;
4761 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4762 dw2 |= 0x10002;
4763
4764 /* set load_ce_ram if preamble presented */
4765 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4766 dw2 |= 0x10000000;
4767 } else {
4768 /* still load_ce_ram if this is the first time preamble presented
4769 * although there is no context switch happens.
4770 */
4771 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4772 dw2 |= 0x10000000;
4773 }
4774
4775 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4776 amdgpu_ring_write(ring, dw2);
4777 amdgpu_ring_write(ring, 0);
4778}
4779
9a5e02b5
ML
4780static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4781{
4782 unsigned ret;
4783 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4784 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4785 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4786 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4787 ret = ring->wptr & ring->buf_mask;
4788 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4789 return ret;
4790}
4791
4792static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4793{
4794 unsigned cur;
4795 BUG_ON(offset > ring->buf_mask);
4796 BUG_ON(ring->ring[offset] != 0x55aa55aa);
4797
4798 cur = (ring->wptr & ring->buf_mask) - 1;
4799 if (likely(cur > offset))
4800 ring->ring[offset] = cur - offset;
4801 else
4802 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4803}
4804
aa6faa44
XY
4805static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4806{
4807 struct amdgpu_device *adev = ring->adev;
4808
4809 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4810 amdgpu_ring_write(ring, 0 | /* src: register*/
4811 (5 << 8) | /* dst: memory */
4812 (1 << 20)); /* write confirm */
4813 amdgpu_ring_write(ring, reg);
4814 amdgpu_ring_write(ring, 0);
4815 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4816 adev->virt.reg_val_offs * 4));
4817 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4818 adev->virt.reg_val_offs * 4));
4819}
4820
4821static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
254e825b 4822 uint32_t val)
aa6faa44 4823{
254e825b
CK
4824 uint32_t cmd = 0;
4825
4826 switch (ring->funcs->type) {
4827 case AMDGPU_RING_TYPE_GFX:
4828 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4829 break;
4830 case AMDGPU_RING_TYPE_KIQ:
4831 cmd = (1 << 16); /* no inc addr */
4832 break;
4833 default:
4834 cmd = WR_CONFIRM;
4835 break;
4836 }
aa6faa44 4837 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
254e825b 4838 amdgpu_ring_write(ring, cmd);
aa6faa44
XY
4839 amdgpu_ring_write(ring, reg);
4840 amdgpu_ring_write(ring, 0);
4841 amdgpu_ring_write(ring, val);
4842}
4843
230fcc34
CK
4844static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4845 uint32_t val, uint32_t mask)
4846{
4847 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4848}
4849
10ed3c31
AD
4850static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4851 uint32_t reg0, uint32_t reg1,
4852 uint32_t ref, uint32_t mask)
4853{
4854 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
39b62541
ED
4855 struct amdgpu_device *adev = ring->adev;
4856 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4857 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
10ed3c31 4858
39b62541 4859 if (fw_version_ok)
58cd8fbc
CK
4860 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4861 ref, mask, 0x20);
4862 else
4863 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4864 ref, mask);
10ed3c31
AD
4865}
4866
80dbea47
CK
4867static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4868{
4869 struct amdgpu_device *adev = ring->adev;
4870 uint32_t value = 0;
4871
4872 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4873 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4874 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4875 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
17b6d2d5 4876 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
80dbea47
CK
4877}
4878
b1023571
KW
4879static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4880 enum amdgpu_interrupt_state state)
4881{
b1023571
KW
4882 switch (state) {
4883 case AMDGPU_IRQ_STATE_DISABLE:
b1023571 4884 case AMDGPU_IRQ_STATE_ENABLE:
9da2c652
TSD
4885 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4886 TIME_STAMP_INT_ENABLE,
4887 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
b1023571
KW
4888 break;
4889 default:
4890 break;
4891 }
4892}
4893
4894static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4895 int me, int pipe,
4896 enum amdgpu_interrupt_state state)
4897{
4898 u32 mec_int_cntl, mec_int_cntl_reg;
4899
4900 /*
d0c55cdf
AD
4901 * amdgpu controls only the first MEC. That's why this function only
4902 * handles the setting of interrupts for this specific MEC. All other
b1023571
KW
4903 * pipes' interrupts are set by amdkfd.
4904 */
4905
4906 if (me == 1) {
4907 switch (pipe) {
4908 case 0:
4909 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4910 break;
d0c55cdf
AD
4911 case 1:
4912 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4913 break;
4914 case 2:
4915 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4916 break;
4917 case 3:
4918 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4919 break;
b1023571
KW
4920 default:
4921 DRM_DEBUG("invalid pipe %d\n", pipe);
4922 return;
4923 }
4924 } else {
4925 DRM_DEBUG("invalid me %d\n", me);
4926 return;
4927 }
4928
4929 switch (state) {
4930 case AMDGPU_IRQ_STATE_DISABLE:
4931 mec_int_cntl = RREG32(mec_int_cntl_reg);
4932 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4933 TIME_STAMP_INT_ENABLE, 0);
4934 WREG32(mec_int_cntl_reg, mec_int_cntl);
4935 break;
4936 case AMDGPU_IRQ_STATE_ENABLE:
4937 mec_int_cntl = RREG32(mec_int_cntl_reg);
4938 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4939 TIME_STAMP_INT_ENABLE, 1);
4940 WREG32(mec_int_cntl_reg, mec_int_cntl);
4941 break;
4942 default:
4943 break;
4944 }
4945}
4946
4947static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4948 struct amdgpu_irq_src *source,
4949 unsigned type,
4950 enum amdgpu_interrupt_state state)
4951{
b1023571
KW
4952 switch (state) {
4953 case AMDGPU_IRQ_STATE_DISABLE:
b1023571 4954 case AMDGPU_IRQ_STATE_ENABLE:
8dd553e1
TSD
4955 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4956 PRIV_REG_INT_ENABLE,
4957 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
b1023571
KW
4958 break;
4959 default:
4960 break;
4961 }
4962
4963 return 0;
4964}
4965
4966static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4967 struct amdgpu_irq_src *source,
4968 unsigned type,
4969 enum amdgpu_interrupt_state state)
4970{
b1023571
KW
4971 switch (state) {
4972 case AMDGPU_IRQ_STATE_DISABLE:
b1023571 4973 case AMDGPU_IRQ_STATE_ENABLE:
98709ca6
TSD
4974 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4975 PRIV_INSTR_INT_ENABLE,
4976 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
b1023571
KW
4977 default:
4978 break;
4979 }
4980
4981 return 0;
4982}
4983
760a1d55
FX
4984#define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
4985 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4986 CP_ECC_ERROR_INT_ENABLE, 1)
4987
4988#define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
4989 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4990 CP_ECC_ERROR_INT_ENABLE, 0)
4991
4992static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4993 struct amdgpu_irq_src *source,
4994 unsigned type,
4995 enum amdgpu_interrupt_state state)
4996{
4997 switch (state) {
4998 case AMDGPU_IRQ_STATE_DISABLE:
4999 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5000 CP_ECC_ERROR_INT_ENABLE, 0);
5001 DISABLE_ECC_ON_ME_PIPE(1, 0);
5002 DISABLE_ECC_ON_ME_PIPE(1, 1);
5003 DISABLE_ECC_ON_ME_PIPE(1, 2);
5004 DISABLE_ECC_ON_ME_PIPE(1, 3);
5005 break;
5006
5007 case AMDGPU_IRQ_STATE_ENABLE:
5008 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5009 CP_ECC_ERROR_INT_ENABLE, 1);
5010 ENABLE_ECC_ON_ME_PIPE(1, 0);
5011 ENABLE_ECC_ON_ME_PIPE(1, 1);
5012 ENABLE_ECC_ON_ME_PIPE(1, 2);
5013 ENABLE_ECC_ON_ME_PIPE(1, 3);
5014 break;
5015 default:
5016 break;
5017 }
5018
5019 return 0;
5020}
5021
5022
b1023571
KW
5023static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5024 struct amdgpu_irq_src *src,
5025 unsigned type,
5026 enum amdgpu_interrupt_state state)
5027{
5028 switch (type) {
53b2fe41 5029 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
b1023571
KW
5030 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5031 break;
5032 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5033 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5034 break;
5035 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5036 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5037 break;
5038 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5039 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5040 break;
5041 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5042 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5043 break;
5044 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5045 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5046 break;
5047 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5048 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5049 break;
5050 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5051 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5052 break;
5053 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5054 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5055 break;
5056 default:
5057 break;
5058 }
5059 return 0;
5060}
5061
5062static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5063 struct amdgpu_irq_src *source,
5064 struct amdgpu_iv_entry *entry)
5065{
5066 int i;
5067 u8 me_id, pipe_id, queue_id;
5068 struct amdgpu_ring *ring;
5069
5070 DRM_DEBUG("IH: CP EOP\n");
5071 me_id = (entry->ring_id & 0x0c) >> 2;
5072 pipe_id = (entry->ring_id & 0x03) >> 0;
5073 queue_id = (entry->ring_id & 0x70) >> 4;
5074
5075 switch (me_id) {
5076 case 0:
5077 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5078 break;
5079 case 1:
5080 case 2:
5081 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5082 ring = &adev->gfx.compute_ring[i];
5083 /* Per-queue interrupt is supported for MEC starting from VI.
5084 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5085 */
5086 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5087 amdgpu_fence_process(ring);
5088 }
5089 break;
5090 }
5091 return 0;
5092}
5093
898c2cb5
CK
5094static void gfx_v9_0_fault(struct amdgpu_device *adev,
5095 struct amdgpu_iv_entry *entry)
5096{
5097 u8 me_id, pipe_id, queue_id;
5098 struct amdgpu_ring *ring;
5099 int i;
5100
5101 me_id = (entry->ring_id & 0x0c) >> 2;
5102 pipe_id = (entry->ring_id & 0x03) >> 0;
5103 queue_id = (entry->ring_id & 0x70) >> 4;
5104
5105 switch (me_id) {
5106 case 0:
5107 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5108 break;
5109 case 1:
5110 case 2:
5111 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5112 ring = &adev->gfx.compute_ring[i];
5113 if (ring->me == me_id && ring->pipe == pipe_id &&
5114 ring->queue == queue_id)
5115 drm_sched_fault(&ring->sched);
5116 }
5117 break;
5118 }
5119}
5120
b1023571
KW
5121static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5122 struct amdgpu_irq_src *source,
5123 struct amdgpu_iv_entry *entry)
5124{
5125 DRM_ERROR("Illegal register access in command stream\n");
898c2cb5 5126 gfx_v9_0_fault(adev, entry);
b1023571
KW
5127 return 0;
5128}
5129
5130static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5131 struct amdgpu_irq_src *source,
5132 struct amdgpu_iv_entry *entry)
5133{
5134 DRM_ERROR("Illegal instruction in command stream\n");
898c2cb5 5135 gfx_v9_0_fault(adev, entry);
b1023571
KW
5136 return 0;
5137}
5138
760a1d55
FX
5139static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5140 struct amdgpu_iv_entry *entry)
5141{
5142 /* TODO ue will trigger an interrupt. */
9b54d201 5143 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
760a1d55
FX
5144 amdgpu_ras_reset_gpu(adev, 0);
5145 return AMDGPU_RAS_UE;
5146}
5147
5148static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5149 struct amdgpu_irq_src *source,
5150 struct amdgpu_iv_entry *entry)
5151{
14cfde84 5152 struct ras_common_if *ras_if = adev->gfx.ras_if;
760a1d55 5153 struct ras_dispatch_if ih_data = {
760a1d55
FX
5154 .entry = entry,
5155 };
14cfde84 5156
5157 if (!ras_if)
5158 return 0;
5159
5160 ih_data.head = *ras_if;
5161
760a1d55
FX
5162 DRM_ERROR("CP ECC ERROR IRQ\n");
5163 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5164 return 0;
5165}
5166
fa04b6ba 5167static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
b1023571
KW
5168 .name = "gfx_v9_0",
5169 .early_init = gfx_v9_0_early_init,
5170 .late_init = gfx_v9_0_late_init,
5171 .sw_init = gfx_v9_0_sw_init,
5172 .sw_fini = gfx_v9_0_sw_fini,
5173 .hw_init = gfx_v9_0_hw_init,
5174 .hw_fini = gfx_v9_0_hw_fini,
5175 .suspend = gfx_v9_0_suspend,
5176 .resume = gfx_v9_0_resume,
5177 .is_idle = gfx_v9_0_is_idle,
5178 .wait_for_idle = gfx_v9_0_wait_for_idle,
5179 .soft_reset = gfx_v9_0_soft_reset,
5180 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
5181 .set_powergating_state = gfx_v9_0_set_powergating_state,
12ad27fa 5182 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
b1023571
KW
5183};
5184
5185static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5186 .type = AMDGPU_RING_TYPE_GFX,
5187 .align_mask = 0xff,
5188 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5189 .support_64bit_ptrs = true,
0eeb68b3 5190 .vmhub = AMDGPU_GFXHUB,
b1023571
KW
5191 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5192 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5193 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
e9d672b2
ML
5194 .emit_frame_size = /* totally 242 maximum if 16 IBs */
5195 5 + /* COND_EXEC */
5196 7 + /* PIPELINE_SYNC */
f732b6b3
CK
5197 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5198 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5199 2 + /* VM_FLUSH */
e9d672b2
ML
5200 8 + /* FENCE for VM_FLUSH */
5201 20 + /* GDS switch */
5202 4 + /* double SWITCH_BUFFER,
5203 the first COND_EXEC jump to the place just
5204 prior to this double SWITCH_BUFFER */
5205 5 + /* COND_EXEC */
5206 7 + /* HDP_flush */
5207 4 + /* VGT_flush */
5208 14 + /* CE_META */
5209 31 + /* DE_META */
5210 3 + /* CNTX_CTRL */
5211 5 + /* HDP_INVL */
5212 8 + 8 + /* FENCE x2 */
5213 2, /* SWITCH_BUFFER */
b1023571
KW
5214 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
5215 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5216 .emit_fence = gfx_v9_0_ring_emit_fence,
5217 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5218 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5219 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5220 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
b1023571
KW
5221 .test_ring = gfx_v9_0_ring_test_ring,
5222 .test_ib = gfx_v9_0_ring_test_ib,
5223 .insert_nop = amdgpu_ring_insert_nop,
5224 .pad_ib = amdgpu_ring_generic_pad_ib,
5225 .emit_switch_buffer = gfx_v9_ring_emit_sb,
5226 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
9a5e02b5
ML
5227 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5228 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
3b4d68e9 5229 .emit_tmz = gfx_v9_0_ring_emit_tmz,
254e825b 5230 .emit_wreg = gfx_v9_0_ring_emit_wreg,
230fcc34 5231 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
10ed3c31 5232 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
80dbea47 5233 .soft_recovery = gfx_v9_0_ring_soft_recovery,
b1023571
KW
5234};
5235
5236static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5237 .type = AMDGPU_RING_TYPE_COMPUTE,
5238 .align_mask = 0xff,
5239 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5240 .support_64bit_ptrs = true,
0eeb68b3 5241 .vmhub = AMDGPU_GFXHUB,
b1023571
KW
5242 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5243 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5244 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5245 .emit_frame_size =
5246 20 + /* gfx_v9_0_ring_emit_gds_switch */
5247 7 + /* gfx_v9_0_ring_emit_hdp_flush */
2ee150cd 5248 5 + /* hdp invalidate */
b1023571 5249 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
f732b6b3
CK
5250 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5251 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5252 2 + /* gfx_v9_0_ring_emit_vm_flush */
b1023571 5253 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
41cca166 5254 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
b1023571
KW
5255 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
5256 .emit_fence = gfx_v9_0_ring_emit_fence,
5257 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5258 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5259 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5260 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
b1023571
KW
5261 .test_ring = gfx_v9_0_ring_test_ring,
5262 .test_ib = gfx_v9_0_ring_test_ib,
5263 .insert_nop = amdgpu_ring_insert_nop,
5264 .pad_ib = amdgpu_ring_generic_pad_ib,
761c77c1 5265 .set_priority = gfx_v9_0_ring_set_priority_compute,
254e825b 5266 .emit_wreg = gfx_v9_0_ring_emit_wreg,
230fcc34 5267 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
10ed3c31 5268 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
b1023571
KW
5269};
5270
aa6faa44
XY
5271static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5272 .type = AMDGPU_RING_TYPE_KIQ,
5273 .align_mask = 0xff,
5274 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5275 .support_64bit_ptrs = true,
0eeb68b3 5276 .vmhub = AMDGPU_GFXHUB,
aa6faa44
XY
5277 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
5278 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
5279 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
5280 .emit_frame_size =
5281 20 + /* gfx_v9_0_ring_emit_gds_switch */
5282 7 + /* gfx_v9_0_ring_emit_hdp_flush */
2ee150cd 5283 5 + /* hdp invalidate */
aa6faa44 5284 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
f732b6b3
CK
5285 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5286 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5287 2 + /* gfx_v9_0_ring_emit_vm_flush */
aa6faa44 5288 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
41cca166 5289 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
aa6faa44 5290 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
aa6faa44 5291 .test_ring = gfx_v9_0_ring_test_ring,
aa6faa44
XY
5292 .insert_nop = amdgpu_ring_insert_nop,
5293 .pad_ib = amdgpu_ring_generic_pad_ib,
5294 .emit_rreg = gfx_v9_0_ring_emit_rreg,
5295 .emit_wreg = gfx_v9_0_ring_emit_wreg,
230fcc34 5296 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
10ed3c31 5297 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
aa6faa44 5298};
b1023571
KW
5299
5300static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5301{
5302 int i;
5303
aa6faa44
XY
5304 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5305
b1023571
KW
5306 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5307 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5308
5309 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5310 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5311}
5312
5313static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5314 .set = gfx_v9_0_set_eop_interrupt_state,
5315 .process = gfx_v9_0_eop_irq,
5316};
5317
5318static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5319 .set = gfx_v9_0_set_priv_reg_fault_state,
5320 .process = gfx_v9_0_priv_reg_irq,
5321};
5322
5323static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5324 .set = gfx_v9_0_set_priv_inst_fault_state,
5325 .process = gfx_v9_0_priv_inst_irq,
5326};
5327
760a1d55
FX
5328static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5329 .set = gfx_v9_0_set_cp_ecc_error_state,
5330 .process = gfx_v9_0_cp_ecc_error_irq,
5331};
5332
5333
b1023571
KW
5334static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5335{
5336 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5337 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5338
5339 adev->gfx.priv_reg_irq.num_types = 1;
5340 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5341
5342 adev->gfx.priv_inst_irq.num_types = 1;
5343 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
760a1d55
FX
5344
5345 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5346 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
b1023571
KW
5347}
5348
5349static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5350{
5351 switch (adev->asic_type) {
5352 case CHIP_VEGA10:
8b399477 5353 case CHIP_VEGA12:
61324ddc 5354 case CHIP_VEGA20:
a4dc61f5 5355 case CHIP_RAVEN:
b1023571
KW
5356 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5357 break;
5358 default:
5359 break;
5360 }
5361}
5362
5363static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5364{
5365 /* init asci gds info */
8bda1013
ED
5366 switch (adev->asic_type) {
5367 case CHIP_VEGA10:
5368 case CHIP_VEGA12:
5369 case CHIP_VEGA20:
dca29491 5370 adev->gds.gds_size = 0x10000;
8bda1013
ED
5371 break;
5372 case CHIP_RAVEN:
dca29491 5373 adev->gds.gds_size = 0x1000;
8bda1013
ED
5374 break;
5375 default:
dca29491 5376 adev->gds.gds_size = 0x10000;
8bda1013
ED
5377 break;
5378 }
5379
41cca166
MO
5380 switch (adev->asic_type) {
5381 case CHIP_VEGA10:
5382 case CHIP_VEGA20:
5383 adev->gds.gds_compute_max_wave_id = 0x7ff;
5384 break;
5385 case CHIP_VEGA12:
5386 adev->gds.gds_compute_max_wave_id = 0x27f;
5387 break;
5388 case CHIP_RAVEN:
5389 if (adev->rev_id >= 0x8)
5390 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5391 else
5392 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5393 break;
5394 default:
5395 /* this really depends on the chip */
5396 adev->gds.gds_compute_max_wave_id = 0x7ff;
5397 break;
5398 }
5399
dca29491
CK
5400 adev->gds.gws_size = 64;
5401 adev->gds.oa_size = 16;
b1023571
KW
5402}
5403
c94d38f0
NH
5404static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5405 u32 bitmap)
5406{
5407 u32 data;
5408
5409 if (!bitmap)
5410 return;
5411
5412 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5413 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5414
5415 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5416}
5417
b1023571
KW
5418static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5419{
5420 u32 data, mask;
5421
5e78835a
TSD
5422 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5423 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
b1023571
KW
5424
5425 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5426 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5427
378506a7 5428 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
b1023571
KW
5429
5430 return (~data) & mask;
5431}
5432
5433static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5434 struct amdgpu_cu_info *cu_info)
5435{
5436 int i, j, k, counter, active_cu_number = 0;
5437 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
c94d38f0 5438 unsigned disable_masks[4 * 2];
b1023571
KW
5439
5440 if (!adev || !cu_info)
5441 return -EINVAL;
5442
c94d38f0
NH
5443 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5444
b1023571
KW
5445 mutex_lock(&adev->grbm_idx_mutex);
5446 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5447 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5448 mask = 1;
5449 ao_bitmap = 0;
5450 counter = 0;
5451 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
c94d38f0
NH
5452 if (i < 4 && j < 2)
5453 gfx_v9_0_set_user_cu_inactive_bitmap(
5454 adev, disable_masks[i * 2 + j]);
b1023571
KW
5455 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5456 cu_info->bitmap[i][j] = bitmap;
5457
fe723cd3 5458 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
b1023571 5459 if (bitmap & mask) {
fe723cd3 5460 if (counter < adev->gfx.config.max_cu_per_sh)
b1023571
KW
5461 ao_bitmap |= mask;
5462 counter ++;
5463 }
5464 mask <<= 1;
5465 }
5466 active_cu_number += counter;
dbfe85ea
FC
5467 if (i < 2 && j < 2)
5468 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5469 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
b1023571
KW
5470 }
5471 }
5472 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5473 mutex_unlock(&adev->grbm_idx_mutex);
5474
5475 cu_info->number = active_cu_number;
5476 cu_info->ao_cu_mask = ao_cu_mask;
d5a114a6 5477 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
b1023571
KW
5478
5479 return 0;
5480}
5481
b1023571
KW
5482const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5483{
5484 .type = AMD_IP_BLOCK_TYPE_GFX,
5485 .major = 9,
5486 .minor = 0,
5487 .rev = 0,
5488 .funcs = &gfx_v9_0_ip_funcs,
5489};