Merge tag 'pm-6.10-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
08a2fd23 35#include <linux/pci-p2pdma.h>
d37a3929 36#include <linux/apple-gmux.h>
fdf2f6c5 37
b7cdb41e 38#include <drm/drm_aperture.h>
4562236b 39#include <drm/drm_atomic_helper.h>
973ad627 40#include <drm/drm_crtc_helper.h>
45b64fd9 41#include <drm/drm_fb_helper.h>
fcd70cd3 42#include <drm/drm_probe_helper.h>
d38ceaf9 43#include <drm/amdgpu_drm.h>
7b1c6263 44#include <linux/device.h>
d38ceaf9
AD
45#include <linux/vgaarb.h>
46#include <linux/vga_switcheroo.h>
47#include <linux/efi.h>
48#include "amdgpu.h"
f4b373f4 49#include "amdgpu_trace.h"
d38ceaf9
AD
50#include "amdgpu_i2c.h"
51#include "atom.h"
52#include "amdgpu_atombios.h"
a5bde2f9 53#include "amdgpu_atomfirmware.h"
d0dd7f0c 54#include "amd_pcie.h"
33f34802
KW
55#ifdef CONFIG_DRM_AMDGPU_SI
56#include "si.h"
57#endif
a2e73f56
AD
58#ifdef CONFIG_DRM_AMDGPU_CIK
59#include "cik.h"
60#endif
aaa36a97 61#include "vi.h"
460826e6 62#include "soc15.h"
0a5b8c7b 63#include "nv.h"
d38ceaf9 64#include "bif/bif_4_1_d.h"
bec86378 65#include <linux/firmware.h>
89041940 66#include "amdgpu_vf_error.h"
d38ceaf9 67
ba997709 68#include "amdgpu_amdkfd.h"
d2f52ac8 69#include "amdgpu_pm.h"
d38ceaf9 70
5183411b 71#include "amdgpu_xgmi.h"
c030f2e4 72#include "amdgpu_ras.h"
9c7c85f7 73#include "amdgpu_pmu.h"
bd607166 74#include "amdgpu_fru_eeprom.h"
04442bf7 75#include "amdgpu_reset.h"
85150626 76#include "amdgpu_virt.h"
9022f01b 77#include "amdgpu_dev_coredump.h"
5183411b 78
d5ea093e 79#include <linux/suspend.h>
c6a6e2db 80#include <drm/task_barrier.h>
3f12acc8 81#include <linux/pm_runtime.h>
d5ea093e 82
f89f8c6b
AG
83#include <drm/drm_drv.h>
84
3ad5dcfe
KHF
85#if IS_ENABLED(CONFIG_X86)
86#include <asm/intel-family.h>
87#endif
88
e2a75f88 89MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 90MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 91MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 92MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 93MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 94MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 95MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 96
2dc80b00 97#define AMDGPU_RESUME_MS 2000
7258fa31
SK
98#define AMDGPU_MAX_RETRY_LIMIT 2
99#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
ad390542
HZ
100#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
2dc80b00 103
b7cdb41e
ML
104static const struct drm_driver amdgpu_kms_driver;
105
050091ab 106const char *amdgpu_asic_name[] = {
da69c161
KW
107 "TAHITI",
108 "PITCAIRN",
109 "VERDE",
110 "OLAND",
111 "HAINAN",
d38ceaf9
AD
112 "BONAIRE",
113 "KAVERI",
114 "KABINI",
115 "HAWAII",
116 "MULLINS",
117 "TOPAZ",
118 "TONGA",
48299f95 119 "FIJI",
d38ceaf9 120 "CARRIZO",
139f4917 121 "STONEY",
2cc0c0b5
FC
122 "POLARIS10",
123 "POLARIS11",
c4642a47 124 "POLARIS12",
48ff108d 125 "VEGAM",
d4196f01 126 "VEGA10",
8fab806a 127 "VEGA12",
956fcddc 128 "VEGA20",
2ca8a5d2 129 "RAVEN",
d6c3b24e 130 "ARCTURUS",
1eee4228 131 "RENOIR",
d46b417a 132 "ALDEBARAN",
852a6626 133 "NAVI10",
d0f56dc2 134 "CYAN_SKILLFISH",
87dbad02 135 "NAVI14",
9802f5d7 136 "NAVI12",
ccaf72d3 137 "SIENNA_CICHLID",
ddd8fbe7 138 "NAVY_FLOUNDER",
4f1e9a76 139 "VANGOGH",
a2468e04 140 "DIMGREY_CAVEFISH",
6f169591 141 "BEIGE_GOBY",
ee9236b7 142 "YELLOW_CARP",
3ae695d6 143 "IP DISCOVERY",
d38ceaf9
AD
144 "LAST",
145};
146
ab66c832
ZL
147static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
148
dcea6e65
KR
149/**
150 * DOC: pcie_replay_count
151 *
152 * The amdgpu driver provides a sysfs API for reporting the total number
153 * of PCIe replays (NAKs)
154 * The file pcie_replay_count is used for this and returns the total
155 * number of replays as a sum of the NAKs generated and NAKs received
156 */
157
158static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
159 struct device_attribute *attr, char *buf)
160{
161 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 162 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
163 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
164
36000c7a 165 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
166}
167
b8920e1e 168static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
169 amdgpu_device_get_pcie_replay_count, NULL);
170
af39e6f4
LL
171static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
172 struct bin_attribute *attr, char *buf,
173 loff_t ppos, size_t count)
174{
175 struct device *dev = kobj_to_dev(kobj);
176 struct drm_device *ddev = dev_get_drvdata(dev);
177 struct amdgpu_device *adev = drm_to_adev(ddev);
178 ssize_t bytes_read;
179
180 switch (ppos) {
181 case AMDGPU_SYS_REG_STATE_XGMI:
182 bytes_read = amdgpu_asic_get_reg_state(
183 adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
184 break;
185 case AMDGPU_SYS_REG_STATE_WAFL:
186 bytes_read = amdgpu_asic_get_reg_state(
187 adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
188 break;
189 case AMDGPU_SYS_REG_STATE_PCIE:
190 bytes_read = amdgpu_asic_get_reg_state(
191 adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
192 break;
193 case AMDGPU_SYS_REG_STATE_USR:
194 bytes_read = amdgpu_asic_get_reg_state(
195 adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
196 break;
197 case AMDGPU_SYS_REG_STATE_USR_1:
198 bytes_read = amdgpu_asic_get_reg_state(
199 adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
200 break;
201 default:
202 return -EINVAL;
203 }
204
205 return bytes_read;
206}
207
208BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
209 AMDGPU_SYS_REG_STATE_END);
210
211int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
212{
213 int ret;
214
215 if (!amdgpu_asic_get_reg_state_supported(adev))
216 return 0;
217
218 ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
219
220 return ret;
221}
222
223void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
224{
225 if (!amdgpu_asic_get_reg_state_supported(adev))
226 return;
227 sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
228}
229
4798db85
LL
230/**
231 * DOC: board_info
232 *
233 * The amdgpu driver provides a sysfs API for giving board related information.
234 * It provides the form factor information in the format
235 *
236 * type : form factor
237 *
238 * Possible form factor values
239 *
240 * - "cem" - PCIE CEM card
241 * - "oam" - Open Compute Accelerator Module
242 * - "unknown" - Not known
243 *
244 */
245
76da73f0
LL
246static ssize_t amdgpu_device_get_board_info(struct device *dev,
247 struct device_attribute *attr,
248 char *buf)
249{
250 struct drm_device *ddev = dev_get_drvdata(dev);
251 struct amdgpu_device *adev = drm_to_adev(ddev);
252 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
253 const char *pkg;
254
255 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
256 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
257
258 switch (pkg_type) {
259 case AMDGPU_PKG_TYPE_CEM:
260 pkg = "cem";
261 break;
262 case AMDGPU_PKG_TYPE_OAM:
263 pkg = "oam";
264 break;
265 default:
266 pkg = "unknown";
267 break;
268 }
269
270 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
271}
272
273static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
274
275static struct attribute *amdgpu_board_attrs[] = {
276 &dev_attr_board_info.attr,
277 NULL,
278};
279
280static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
281 struct attribute *attr, int n)
282{
283 struct device *dev = kobj_to_dev(kobj);
284 struct drm_device *ddev = dev_get_drvdata(dev);
285 struct amdgpu_device *adev = drm_to_adev(ddev);
286
287 if (adev->flags & AMD_IS_APU)
288 return 0;
289
290 return attr->mode;
291}
292
293static const struct attribute_group amdgpu_board_attrs_group = {
294 .attrs = amdgpu_board_attrs,
295 .is_visible = amdgpu_board_attrs_is_visible
296};
297
5494d864
AD
298static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
299
bd607166 300
fd496ca8 301/**
b98c6299 302 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
303 *
304 * @dev: drm_device pointer
305 *
b98c6299 306 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
307 * otherwise return false.
308 */
b98c6299 309bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
310{
311 struct amdgpu_device *adev = drm_to_adev(dev);
312
b98c6299 313 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
314 return true;
315 return false;
316}
317
e3ecdffa 318/**
0330b848 319 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
320 *
321 * @dev: drm_device pointer
322 *
b98c6299 323 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
324 * otherwise return false.
325 */
31af062a 326bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 327{
1348969a 328 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 329
b98c6299
AD
330 if (adev->has_pr3 ||
331 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
332 return true;
333 return false;
334}
335
a69cba42
AD
336/**
337 * amdgpu_device_supports_baco - Does the device support BACO
338 *
339 * @dev: drm_device pointer
340 *
b2207dc6
MJ
341 * Return:
342 * 1 if the device supporte BACO;
343 * 3 if the device support MACO (only works if BACO is supported)
344 * otherwise return 0.
a69cba42 345 */
b2207dc6 346int amdgpu_device_supports_baco(struct drm_device *dev)
a69cba42 347{
1348969a 348 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
349
350 return amdgpu_asic_supports_baco(adev);
351}
352
13478532
MJ
353void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
354{
355 struct drm_device *dev;
356 int bamaco_support;
357
358 dev = adev_to_drm(adev);
359
360 adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
361 bamaco_support = amdgpu_device_supports_baco(dev);
362
363 switch (amdgpu_runtime_pm) {
364 case 2:
365 if (bamaco_support & MACO_SUPPORT) {
366 adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
367 dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
368 } else if (bamaco_support == BACO_SUPPORT) {
369 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
370 dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
371 }
372 break;
373 case 1:
374 if (bamaco_support & BACO_SUPPORT) {
375 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
376 dev_info(adev->dev, "Forcing BACO for runtime pm\n");
377 }
378 break;
379 case -1:
380 case -2:
381 if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
382 adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
383 dev_info(adev->dev, "Using ATPX for runtime pm\n");
384 } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
385 adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
386 dev_info(adev->dev, "Using BOCO for runtime pm\n");
387 } else {
388 if (!bamaco_support)
389 goto no_runtime_pm;
390
391 switch (adev->asic_type) {
392 case CHIP_VEGA20:
393 case CHIP_ARCTURUS:
394 /* BACO are not supported on vega20 and arctrus */
395 break;
396 case CHIP_VEGA10:
397 /* enable BACO as runpm mode if noretry=0 */
398 if (!adev->gmc.noretry)
399 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
400 break;
401 default:
402 /* enable BACO as runpm mode on CI+ */
403 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
404 break;
405 }
406
407 if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
408 if (bamaco_support & MACO_SUPPORT) {
409 adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
410 dev_info(adev->dev, "Using BAMACO for runtime pm\n");
411 } else {
412 dev_info(adev->dev, "Using BACO for runtime pm\n");
413 }
414 }
415 }
416 break;
417 case 0:
418 dev_info(adev->dev, "runtime pm is manually disabled\n");
419 break;
420 default:
421 break;
422 }
423
424no_runtime_pm:
425 if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
426 dev_info(adev->dev, "Runtime PM not available\n");
427}
3fa8f89d
S
428/**
429 * amdgpu_device_supports_smart_shift - Is the device dGPU with
430 * smart shift support
431 *
432 * @dev: drm_device pointer
433 *
434 * Returns true if the device is a dGPU with Smart Shift support,
435 * otherwise returns false.
436 */
437bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
438{
439 return (amdgpu_device_supports_boco(dev) &&
440 amdgpu_acpi_is_power_shift_control_supported());
441}
442
6e3cd2a9
MCC
443/*
444 * VRAM access helper functions
445 */
446
e35e2b11 447/**
048af66b 448 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
449 *
450 * @adev: amdgpu_device pointer
451 * @pos: offset of the buffer in vram
452 * @buf: virtual address of the buffer in system memory
453 * @size: read/write size, sizeof(@buf) must > @size
454 * @write: true - write to vram, otherwise - read from vram
455 */
048af66b
KW
456void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
457 void *buf, size_t size, bool write)
e35e2b11 458{
e35e2b11 459 unsigned long flags;
048af66b
KW
460 uint32_t hi = ~0, tmp = 0;
461 uint32_t *data = buf;
ce05ac56 462 uint64_t last;
f89f8c6b 463 int idx;
ce05ac56 464
c58a863b 465 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 466 return;
9d11eb0d 467
048af66b
KW
468 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
469
470 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
471 for (last = pos + size; pos < last; pos += 4) {
472 tmp = pos >> 31;
473
474 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
475 if (tmp != hi) {
476 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
477 hi = tmp;
478 }
479 if (write)
480 WREG32_NO_KIQ(mmMM_DATA, *data++);
481 else
482 *data++ = RREG32_NO_KIQ(mmMM_DATA);
483 }
484
485 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
486 drm_dev_exit(idx);
487}
488
489/**
bbe04dec 490 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
491 *
492 * @adev: amdgpu_device pointer
493 * @pos: offset of the buffer in vram
494 * @buf: virtual address of the buffer in system memory
495 * @size: read/write size, sizeof(@buf) must > @size
496 * @write: true - write to vram, otherwise - read from vram
497 *
498 * The return value means how many bytes have been transferred.
499 */
500size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
501 void *buf, size_t size, bool write)
502{
9d11eb0d 503#ifdef CONFIG_64BIT
048af66b
KW
504 void __iomem *addr;
505 size_t count = 0;
506 uint64_t last;
507
508 if (!adev->mman.aper_base_kaddr)
509 return 0;
510
9d11eb0d
CK
511 last = min(pos + size, adev->gmc.visible_vram_size);
512 if (last > pos) {
048af66b
KW
513 addr = adev->mman.aper_base_kaddr + pos;
514 count = last - pos;
9d11eb0d
CK
515
516 if (write) {
517 memcpy_toio(addr, buf, count);
4c452b5c
SS
518 /* Make sure HDP write cache flush happens without any reordering
519 * after the system memory contents are sent over PCIe device
520 */
9d11eb0d 521 mb();
810085dd 522 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 523 } else {
810085dd 524 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
525 /* Make sure HDP read cache is invalidated before issuing a read
526 * to the PCIe device
527 */
9d11eb0d
CK
528 mb();
529 memcpy_fromio(buf, addr, count);
530 }
531
9d11eb0d 532 }
048af66b
KW
533
534 return count;
535#else
536 return 0;
9d11eb0d 537#endif
048af66b 538}
9d11eb0d 539
048af66b
KW
540/**
541 * amdgpu_device_vram_access - read/write a buffer in vram
542 *
543 * @adev: amdgpu_device pointer
544 * @pos: offset of the buffer in vram
545 * @buf: virtual address of the buffer in system memory
546 * @size: read/write size, sizeof(@buf) must > @size
547 * @write: true - write to vram, otherwise - read from vram
548 */
549void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
550 void *buf, size_t size, bool write)
551{
552 size_t count;
e35e2b11 553
048af66b
KW
554 /* try to using vram apreature to access vram first */
555 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
556 size -= count;
557 if (size) {
558 /* using MM to access rest vram */
559 pos += count;
560 buf += count;
561 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
562 }
563}
564
d38ceaf9 565/*
f7ee1874 566 * register access helper functions.
d38ceaf9 567 */
56b53c0b
DL
568
569/* Check if hw access should be skipped because of hotplug or device error */
570bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
571{
7afefb81 572 if (adev->no_hw_access)
56b53c0b
DL
573 return true;
574
575#ifdef CONFIG_LOCKDEP
576 /*
577 * This is a bit complicated to understand, so worth a comment. What we assert
578 * here is that the GPU reset is not running on another thread in parallel.
579 *
580 * For this we trylock the read side of the reset semaphore, if that succeeds
581 * we know that the reset is not running in paralell.
582 *
583 * If the trylock fails we assert that we are either already holding the read
584 * side of the lock or are the reset thread itself and hold the write side of
585 * the lock.
586 */
587 if (in_task()) {
d0fb18b5
AG
588 if (down_read_trylock(&adev->reset_domain->sem))
589 up_read(&adev->reset_domain->sem);
56b53c0b 590 else
d0fb18b5 591 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
592 }
593#endif
594 return false;
595}
596
e3ecdffa 597/**
f7ee1874 598 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
599 *
600 * @adev: amdgpu_device pointer
601 * @reg: dword aligned register offset
602 * @acc_flags: access flags which require special behavior
603 *
604 * Returns the 32 bit value from the offset specified.
605 */
f7ee1874
HZ
606uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
607 uint32_t reg, uint32_t acc_flags)
d38ceaf9 608{
f4b373f4
TSD
609 uint32_t ret;
610
56b53c0b 611 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
612 return 0;
613
f7ee1874
HZ
614 if ((reg * 4) < adev->rmmio_size) {
615 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
616 amdgpu_sriov_runtime(adev) &&
d0fb18b5 617 down_read_trylock(&adev->reset_domain->sem)) {
85150626 618 ret = amdgpu_kiq_rreg(adev, reg, 0);
d0fb18b5 619 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
620 } else {
621 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
622 }
623 } else {
624 ret = adev->pcie_rreg(adev, reg * 4);
81202807 625 }
bc992ba5 626
f7ee1874 627 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 628
f4b373f4 629 return ret;
d38ceaf9
AD
630}
631
421a2a30
ML
632/*
633 * MMIO register read with bytes helper functions
634 * @offset:bytes offset from MMIO start
b8920e1e 635 */
421a2a30 636
e3ecdffa
AD
637/**
638 * amdgpu_mm_rreg8 - read a memory mapped IO register
639 *
640 * @adev: amdgpu_device pointer
641 * @offset: byte aligned register offset
642 *
643 * Returns the 8 bit value from the offset specified.
644 */
7cbbc745
AG
645uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
646{
56b53c0b 647 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
648 return 0;
649
421a2a30
ML
650 if (offset < adev->rmmio_size)
651 return (readb(adev->rmmio + offset));
652 BUG();
653}
654
85150626
VL
655
656/**
657 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
658 *
659 * @adev: amdgpu_device pointer
660 * @reg: dword aligned register offset
661 * @acc_flags: access flags which require special behavior
662 * @xcc_id: xcc accelerated compute core id
663 *
664 * Returns the 32 bit value from the offset specified.
665 */
666uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
667 uint32_t reg, uint32_t acc_flags,
668 uint32_t xcc_id)
669{
670 uint32_t ret, rlcg_flag;
671
672 if (amdgpu_device_skip_hw_access(adev))
673 return 0;
674
675 if ((reg * 4) < adev->rmmio_size) {
676 if (amdgpu_sriov_vf(adev) &&
677 !amdgpu_sriov_runtime(adev) &&
678 adev->gfx.rlc.rlcg_reg_access_supported &&
679 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
680 GC_HWIP, false,
681 &rlcg_flag)) {
682 ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
683 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
684 amdgpu_sriov_runtime(adev) &&
685 down_read_trylock(&adev->reset_domain->sem)) {
686 ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
687 up_read(&adev->reset_domain->sem);
688 } else {
689 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
690 }
691 } else {
692 ret = adev->pcie_rreg(adev, reg * 4);
693 }
694
695 return ret;
696}
697
421a2a30
ML
698/*
699 * MMIO register write with bytes helper functions
700 * @offset:bytes offset from MMIO start
701 * @value: the value want to be written to the register
b8920e1e
SS
702 */
703
e3ecdffa
AD
704/**
705 * amdgpu_mm_wreg8 - read a memory mapped IO register
706 *
707 * @adev: amdgpu_device pointer
708 * @offset: byte aligned register offset
709 * @value: 8 bit value to write
710 *
711 * Writes the value specified to the offset specified.
712 */
7cbbc745
AG
713void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
714{
56b53c0b 715 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
716 return;
717
421a2a30
ML
718 if (offset < adev->rmmio_size)
719 writeb(value, adev->rmmio + offset);
720 else
721 BUG();
722}
723
e3ecdffa 724/**
f7ee1874 725 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
726 *
727 * @adev: amdgpu_device pointer
728 * @reg: dword aligned register offset
729 * @v: 32 bit value to write to the register
730 * @acc_flags: access flags which require special behavior
731 *
732 * Writes the value specified to the offset specified.
733 */
f7ee1874
HZ
734void amdgpu_device_wreg(struct amdgpu_device *adev,
735 uint32_t reg, uint32_t v,
736 uint32_t acc_flags)
d38ceaf9 737{
56b53c0b 738 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
739 return;
740
f7ee1874
HZ
741 if ((reg * 4) < adev->rmmio_size) {
742 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
743 amdgpu_sriov_runtime(adev) &&
d0fb18b5 744 down_read_trylock(&adev->reset_domain->sem)) {
85150626 745 amdgpu_kiq_wreg(adev, reg, v, 0);
d0fb18b5 746 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
747 } else {
748 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
749 }
750 } else {
751 adev->pcie_wreg(adev, reg * 4, v);
81202807 752 }
bc992ba5 753
f7ee1874 754 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 755}
d38ceaf9 756
03f2abb0 757/**
4cc9f86f 758 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 759 *
71579346
RB
760 * @adev: amdgpu_device pointer
761 * @reg: mmio/rlc register
762 * @v: value to write
8057a9d6 763 * @xcc_id: xcc accelerated compute core id
71579346
RB
764 *
765 * this function is invoked only for the debugfs register access
03f2abb0 766 */
f7ee1874 767void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
768 uint32_t reg, uint32_t v,
769 uint32_t xcc_id)
2e0cc4d4 770{
56b53c0b 771 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
772 return;
773
2e0cc4d4 774 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
775 adev->gfx.rlc.funcs &&
776 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 777 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 778 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
779 } else if ((reg * 4) >= adev->rmmio_size) {
780 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
781 } else {
782 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 783 }
d38ceaf9
AD
784}
785
85150626
VL
786/**
787 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
788 *
789 * @adev: amdgpu_device pointer
790 * @reg: dword aligned register offset
791 * @v: 32 bit value to write to the register
792 * @acc_flags: access flags which require special behavior
793 * @xcc_id: xcc accelerated compute core id
794 *
795 * Writes the value specified to the offset specified.
796 */
797void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
798 uint32_t reg, uint32_t v,
799 uint32_t acc_flags, uint32_t xcc_id)
800{
801 uint32_t rlcg_flag;
802
803 if (amdgpu_device_skip_hw_access(adev))
804 return;
805
806 if ((reg * 4) < adev->rmmio_size) {
807 if (amdgpu_sriov_vf(adev) &&
808 !amdgpu_sriov_runtime(adev) &&
809 adev->gfx.rlc.rlcg_reg_access_supported &&
810 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
811 GC_HWIP, true,
812 &rlcg_flag)) {
813 amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
814 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
815 amdgpu_sriov_runtime(adev) &&
816 down_read_trylock(&adev->reset_domain->sem)) {
817 amdgpu_kiq_wreg(adev, reg, v, xcc_id);
818 up_read(&adev->reset_domain->sem);
819 } else {
820 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
821 }
822 } else {
823 adev->pcie_wreg(adev, reg * 4, v);
824 }
825}
826
1bba3683
HZ
827/**
828 * amdgpu_device_indirect_rreg - read an indirect register
829 *
830 * @adev: amdgpu_device pointer
22f453fb 831 * @reg_addr: indirect register address to read from
1bba3683
HZ
832 *
833 * Returns the value of indirect register @reg_addr
834 */
835u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
836 u32 reg_addr)
837{
65ba96e9 838 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
839 void __iomem *pcie_index_offset;
840 void __iomem *pcie_data_offset;
65ba96e9
HZ
841 u32 r;
842
843 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
844 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
845
846 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
847 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
848 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
849
850 writel(reg_addr, pcie_index_offset);
851 readl(pcie_index_offset);
852 r = readl(pcie_data_offset);
853 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
854
855 return r;
856}
857
0c552ed3
LM
858u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
859 u64 reg_addr)
860{
861 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
862 u32 r;
863 void __iomem *pcie_index_offset;
864 void __iomem *pcie_index_hi_offset;
865 void __iomem *pcie_data_offset;
866
ad390542
HZ
867 if (unlikely(!adev->nbio.funcs)) {
868 pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
869 pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
870 } else {
871 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
872 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
873 }
874
875 if (reg_addr >> 32) {
876 if (unlikely(!adev->nbio.funcs))
877 pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
878 else
879 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
880 } else {
0c552ed3 881 pcie_index_hi = 0;
ad390542 882 }
0c552ed3
LM
883
884 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
885 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
886 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
887 if (pcie_index_hi != 0)
888 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
889 pcie_index_hi * 4;
890
891 writel(reg_addr, pcie_index_offset);
892 readl(pcie_index_offset);
893 if (pcie_index_hi != 0) {
894 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
895 readl(pcie_index_hi_offset);
896 }
897 r = readl(pcie_data_offset);
898
899 /* clear the high bits */
900 if (pcie_index_hi != 0) {
901 writel(0, pcie_index_hi_offset);
902 readl(pcie_index_hi_offset);
903 }
904
905 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
906
907 return r;
908}
909
1bba3683
HZ
910/**
911 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
912 *
913 * @adev: amdgpu_device pointer
22f453fb 914 * @reg_addr: indirect register address to read from
1bba3683
HZ
915 *
916 * Returns the value of indirect register @reg_addr
917 */
918u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
919 u32 reg_addr)
920{
65ba96e9 921 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
922 void __iomem *pcie_index_offset;
923 void __iomem *pcie_data_offset;
65ba96e9
HZ
924 u64 r;
925
926 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
927 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
928
929 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
930 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
931 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
932
933 /* read low 32 bits */
934 writel(reg_addr, pcie_index_offset);
935 readl(pcie_index_offset);
936 r = readl(pcie_data_offset);
937 /* read high 32 bits */
938 writel(reg_addr + 4, pcie_index_offset);
939 readl(pcie_index_offset);
940 r |= ((u64)readl(pcie_data_offset) << 32);
941 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
942
943 return r;
944}
945
a76b2870
CL
946u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
947 u64 reg_addr)
948{
949 unsigned long flags, pcie_index, pcie_data;
950 unsigned long pcie_index_hi = 0;
951 void __iomem *pcie_index_offset;
952 void __iomem *pcie_index_hi_offset;
953 void __iomem *pcie_data_offset;
954 u64 r;
955
956 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
957 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
958 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
959 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
960
961 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
962 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
963 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
964 if (pcie_index_hi != 0)
965 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
966 pcie_index_hi * 4;
967
968 /* read low 32 bits */
969 writel(reg_addr, pcie_index_offset);
970 readl(pcie_index_offset);
971 if (pcie_index_hi != 0) {
972 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
973 readl(pcie_index_hi_offset);
974 }
975 r = readl(pcie_data_offset);
976 /* read high 32 bits */
977 writel(reg_addr + 4, pcie_index_offset);
978 readl(pcie_index_offset);
979 if (pcie_index_hi != 0) {
980 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
981 readl(pcie_index_hi_offset);
982 }
983 r |= ((u64)readl(pcie_data_offset) << 32);
984
985 /* clear the high bits */
986 if (pcie_index_hi != 0) {
987 writel(0, pcie_index_hi_offset);
988 readl(pcie_index_hi_offset);
989 }
990
991 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
992
993 return r;
994}
995
1bba3683
HZ
996/**
997 * amdgpu_device_indirect_wreg - write an indirect register address
998 *
999 * @adev: amdgpu_device pointer
1bba3683
HZ
1000 * @reg_addr: indirect register offset
1001 * @reg_data: indirect register data
1002 *
1003 */
1004void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
1005 u32 reg_addr, u32 reg_data)
1006{
65ba96e9 1007 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
1008 void __iomem *pcie_index_offset;
1009 void __iomem *pcie_data_offset;
1010
65ba96e9
HZ
1011 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1012 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1013
1bba3683
HZ
1014 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1015 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1016 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1017
1018 writel(reg_addr, pcie_index_offset);
1019 readl(pcie_index_offset);
1020 writel(reg_data, pcie_data_offset);
1021 readl(pcie_data_offset);
1022 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1023}
1024
0c552ed3
LM
1025void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1026 u64 reg_addr, u32 reg_data)
1027{
1028 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1029 void __iomem *pcie_index_offset;
1030 void __iomem *pcie_index_hi_offset;
1031 void __iomem *pcie_data_offset;
1032
1033 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1034 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 1035 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
1036 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1037 else
1038 pcie_index_hi = 0;
1039
1040 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1041 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1042 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1043 if (pcie_index_hi != 0)
1044 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1045 pcie_index_hi * 4;
1046
1047 writel(reg_addr, pcie_index_offset);
1048 readl(pcie_index_offset);
1049 if (pcie_index_hi != 0) {
1050 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1051 readl(pcie_index_hi_offset);
1052 }
1053 writel(reg_data, pcie_data_offset);
1054 readl(pcie_data_offset);
1055
1056 /* clear the high bits */
1057 if (pcie_index_hi != 0) {
1058 writel(0, pcie_index_hi_offset);
1059 readl(pcie_index_hi_offset);
1060 }
1061
1062 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1063}
1064
1bba3683
HZ
1065/**
1066 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1067 *
1068 * @adev: amdgpu_device pointer
1bba3683
HZ
1069 * @reg_addr: indirect register offset
1070 * @reg_data: indirect register data
1071 *
1072 */
1073void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
1074 u32 reg_addr, u64 reg_data)
1075{
65ba96e9 1076 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
1077 void __iomem *pcie_index_offset;
1078 void __iomem *pcie_data_offset;
1079
65ba96e9
HZ
1080 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1081 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1082
1bba3683
HZ
1083 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1084 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1085 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1086
1087 /* write low 32 bits */
1088 writel(reg_addr, pcie_index_offset);
1089 readl(pcie_index_offset);
1090 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1091 readl(pcie_data_offset);
1092 /* write high 32 bits */
1093 writel(reg_addr + 4, pcie_index_offset);
1094 readl(pcie_index_offset);
1095 writel((u32)(reg_data >> 32), pcie_data_offset);
1096 readl(pcie_data_offset);
1097 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1098}
1099
a76b2870
CL
1100void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1101 u64 reg_addr, u64 reg_data)
1102{
1103 unsigned long flags, pcie_index, pcie_data;
1104 unsigned long pcie_index_hi = 0;
1105 void __iomem *pcie_index_offset;
1106 void __iomem *pcie_index_hi_offset;
1107 void __iomem *pcie_data_offset;
1108
1109 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1110 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1111 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1112 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1113
1114 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1115 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1116 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1117 if (pcie_index_hi != 0)
1118 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1119 pcie_index_hi * 4;
1120
1121 /* write low 32 bits */
1122 writel(reg_addr, pcie_index_offset);
1123 readl(pcie_index_offset);
1124 if (pcie_index_hi != 0) {
1125 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1126 readl(pcie_index_hi_offset);
1127 }
1128 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1129 readl(pcie_data_offset);
1130 /* write high 32 bits */
1131 writel(reg_addr + 4, pcie_index_offset);
1132 readl(pcie_index_offset);
1133 if (pcie_index_hi != 0) {
1134 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1135 readl(pcie_index_hi_offset);
1136 }
1137 writel((u32)(reg_data >> 32), pcie_data_offset);
1138 readl(pcie_data_offset);
1139
1140 /* clear the high bits */
1141 if (pcie_index_hi != 0) {
1142 writel(0, pcie_index_hi_offset);
1143 readl(pcie_index_hi_offset);
1144 }
1145
1146 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1147}
1148
dabc114e
HZ
1149/**
1150 * amdgpu_device_get_rev_id - query device rev_id
1151 *
1152 * @adev: amdgpu_device pointer
1153 *
1154 * Return device rev_id
1155 */
1156u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1157{
1158 return adev->nbio.funcs->get_rev_id(adev);
1159}
1160
d38ceaf9
AD
1161/**
1162 * amdgpu_invalid_rreg - dummy reg read function
1163 *
982a820b 1164 * @adev: amdgpu_device pointer
d38ceaf9
AD
1165 * @reg: offset of register
1166 *
1167 * Dummy register read function. Used for register blocks
1168 * that certain asics don't have (all asics).
1169 * Returns the value in the register.
1170 */
1171static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1172{
1173 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1174 BUG();
1175 return 0;
1176}
1177
0c552ed3
LM
1178static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1179{
1180 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1181 BUG();
1182 return 0;
1183}
1184
d38ceaf9
AD
1185/**
1186 * amdgpu_invalid_wreg - dummy reg write function
1187 *
982a820b 1188 * @adev: amdgpu_device pointer
d38ceaf9
AD
1189 * @reg: offset of register
1190 * @v: value to write to the register
1191 *
1192 * Dummy register read function. Used for register blocks
1193 * that certain asics don't have (all asics).
1194 */
1195static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1196{
1197 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1198 reg, v);
1199 BUG();
1200}
1201
0c552ed3
LM
1202static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1203{
1204 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1205 reg, v);
1206 BUG();
1207}
1208
4fa1c6a6
TZ
1209/**
1210 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1211 *
982a820b 1212 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1213 * @reg: offset of register
1214 *
1215 * Dummy register read function. Used for register blocks
1216 * that certain asics don't have (all asics).
1217 * Returns the value in the register.
1218 */
1219static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1220{
1221 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1222 BUG();
1223 return 0;
1224}
1225
a76b2870
CL
1226static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1227{
1228 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1229 BUG();
1230 return 0;
1231}
1232
4fa1c6a6
TZ
1233/**
1234 * amdgpu_invalid_wreg64 - dummy reg write function
1235 *
982a820b 1236 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1237 * @reg: offset of register
1238 * @v: value to write to the register
1239 *
1240 * Dummy register read function. Used for register blocks
1241 * that certain asics don't have (all asics).
1242 */
1243static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1244{
1245 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1246 reg, v);
1247 BUG();
1248}
1249
a76b2870
CL
1250static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1251{
1252 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1253 reg, v);
1254 BUG();
1255}
1256
d38ceaf9
AD
1257/**
1258 * amdgpu_block_invalid_rreg - dummy reg read function
1259 *
982a820b 1260 * @adev: amdgpu_device pointer
d38ceaf9
AD
1261 * @block: offset of instance
1262 * @reg: offset of register
1263 *
1264 * Dummy register read function. Used for register blocks
1265 * that certain asics don't have (all asics).
1266 * Returns the value in the register.
1267 */
1268static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1269 uint32_t block, uint32_t reg)
1270{
1271 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1272 reg, block);
1273 BUG();
1274 return 0;
1275}
1276
1277/**
1278 * amdgpu_block_invalid_wreg - dummy reg write function
1279 *
982a820b 1280 * @adev: amdgpu_device pointer
d38ceaf9
AD
1281 * @block: offset of instance
1282 * @reg: offset of register
1283 * @v: value to write to the register
1284 *
1285 * Dummy register read function. Used for register blocks
1286 * that certain asics don't have (all asics).
1287 */
1288static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1289 uint32_t block,
1290 uint32_t reg, uint32_t v)
1291{
1292 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1293 reg, block, v);
1294 BUG();
1295}
1296
4d2997ab
AD
1297/**
1298 * amdgpu_device_asic_init - Wrapper for atom asic_init
1299 *
982a820b 1300 * @adev: amdgpu_device pointer
4d2997ab
AD
1301 *
1302 * Does any asic specific work and then calls atom asic init.
1303 */
1304static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1305{
7656168a
LL
1306 int ret;
1307
4d2997ab
AD
1308 amdgpu_asic_pre_asic_init(adev);
1309
4e8303cf
LL
1310 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1311 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
7656168a
LL
1312 amdgpu_psp_wait_for_bootloader(adev);
1313 ret = amdgpu_atomfirmware_asic_init(adev, true);
1314 return ret;
1315 } else {
85d1bcc6 1316 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
7656168a
LL
1317 }
1318
1319 return 0;
4d2997ab
AD
1320}
1321
e3ecdffa 1322/**
7ccfd79f 1323 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1324 *
982a820b 1325 * @adev: amdgpu_device pointer
e3ecdffa
AD
1326 *
1327 * Allocates a scratch page of VRAM for use by various things in the
1328 * driver.
1329 */
7ccfd79f 1330static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1331{
7ccfd79f
CK
1332 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1333 AMDGPU_GEM_DOMAIN_VRAM |
1334 AMDGPU_GEM_DOMAIN_GTT,
1335 &adev->mem_scratch.robj,
1336 &adev->mem_scratch.gpu_addr,
1337 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1338}
1339
e3ecdffa 1340/**
7ccfd79f 1341 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1342 *
982a820b 1343 * @adev: amdgpu_device pointer
e3ecdffa
AD
1344 *
1345 * Frees the VRAM scratch page.
1346 */
7ccfd79f 1347static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1348{
7ccfd79f 1349 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1350}
1351
1352/**
9c3f2b54 1353 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1354 *
1355 * @adev: amdgpu_device pointer
1356 * @registers: pointer to the register array
1357 * @array_size: size of the register array
1358 *
b8920e1e 1359 * Programs an array or registers with and or masks.
d38ceaf9
AD
1360 * This is a helper for setting golden registers.
1361 */
9c3f2b54
AD
1362void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1363 const u32 *registers,
1364 const u32 array_size)
d38ceaf9
AD
1365{
1366 u32 tmp, reg, and_mask, or_mask;
1367 int i;
1368
1369 if (array_size % 3)
1370 return;
1371
47fc644f 1372 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1373 reg = registers[i + 0];
1374 and_mask = registers[i + 1];
1375 or_mask = registers[i + 2];
1376
1377 if (and_mask == 0xffffffff) {
1378 tmp = or_mask;
1379 } else {
1380 tmp = RREG32(reg);
1381 tmp &= ~and_mask;
e0d07657
HZ
1382 if (adev->family >= AMDGPU_FAMILY_AI)
1383 tmp |= (or_mask & and_mask);
1384 else
1385 tmp |= or_mask;
d38ceaf9
AD
1386 }
1387 WREG32(reg, tmp);
1388 }
1389}
1390
e3ecdffa
AD
1391/**
1392 * amdgpu_device_pci_config_reset - reset the GPU
1393 *
1394 * @adev: amdgpu_device pointer
1395 *
1396 * Resets the GPU using the pci config reset sequence.
1397 * Only applicable to asics prior to vega10.
1398 */
8111c387 1399void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1400{
1401 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1402}
1403
af484df8
AD
1404/**
1405 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1406 *
1407 * @adev: amdgpu_device pointer
1408 *
1409 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1410 */
1411int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1412{
1413 return pci_reset_function(adev->pdev);
1414}
1415
d38ceaf9 1416/*
06ec9070 1417 * amdgpu_device_wb_*()
455a7bc2 1418 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1419 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1420 */
1421
1422/**
06ec9070 1423 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1424 *
1425 * @adev: amdgpu_device pointer
1426 *
1427 * Disables Writeback and frees the Writeback memory (all asics).
1428 * Used at driver shutdown.
1429 */
06ec9070 1430static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1431{
1432 if (adev->wb.wb_obj) {
a76ed485
AD
1433 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1434 &adev->wb.gpu_addr,
1435 (void **)&adev->wb.wb);
d38ceaf9
AD
1436 adev->wb.wb_obj = NULL;
1437 }
1438}
1439
1440/**
03f2abb0 1441 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1442 *
1443 * @adev: amdgpu_device pointer
1444 *
455a7bc2 1445 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1446 * Used at driver startup.
1447 * Returns 0 on success or an -error on failure.
1448 */
06ec9070 1449static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1450{
1451 int r;
1452
1453 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1454 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1455 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1456 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1457 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1458 (void **)&adev->wb.wb);
d38ceaf9
AD
1459 if (r) {
1460 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1461 return r;
1462 }
d38ceaf9
AD
1463
1464 adev->wb.num_wb = AMDGPU_MAX_WB;
1465 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1466
1467 /* clear wb memory */
73469585 1468 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1469 }
1470
1471 return 0;
1472}
1473
1474/**
131b4b36 1475 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1476 *
1477 * @adev: amdgpu_device pointer
1478 * @wb: wb index
1479 *
1480 * Allocate a wb slot for use by the driver (all asics).
1481 * Returns 0 on success or -EINVAL on failure.
1482 */
131b4b36 1483int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9 1484{
497d7cee 1485 unsigned long flags, offset;
d38ceaf9 1486
497d7cee
AD
1487 spin_lock_irqsave(&adev->wb.lock, flags);
1488 offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
97407b63 1489 if (offset < adev->wb.num_wb) {
7014285a 1490 __set_bit(offset, adev->wb.used);
497d7cee 1491 spin_unlock_irqrestore(&adev->wb.lock, flags);
63ae07ca 1492 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1493 return 0;
1494 } else {
497d7cee 1495 spin_unlock_irqrestore(&adev->wb.lock, flags);
0915fdbc
ML
1496 return -EINVAL;
1497 }
1498}
1499
d38ceaf9 1500/**
131b4b36 1501 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1502 *
1503 * @adev: amdgpu_device pointer
1504 * @wb: wb index
1505 *
1506 * Free a wb slot allocated for use by the driver (all asics)
1507 */
131b4b36 1508void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1509{
497d7cee
AD
1510 unsigned long flags;
1511
73469585 1512 wb >>= 3;
497d7cee 1513 spin_lock_irqsave(&adev->wb.lock, flags);
d38ceaf9 1514 if (wb < adev->wb.num_wb)
73469585 1515 __clear_bit(wb, adev->wb.used);
497d7cee 1516 spin_unlock_irqrestore(&adev->wb.lock, flags);
d38ceaf9
AD
1517}
1518
d6895ad3
CK
1519/**
1520 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1521 *
1522 * @adev: amdgpu_device pointer
1523 *
1524 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1525 * to fail, but if any of the BARs is not accessible after the size we abort
1526 * driver loading by returning -ENODEV.
1527 */
1528int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1529{
453f617a 1530 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1531 struct pci_bus *root;
1532 struct resource *res;
b8920e1e 1533 unsigned int i;
d6895ad3
CK
1534 u16 cmd;
1535 int r;
1536
822130b5
AB
1537 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1538 return 0;
1539
0c03b912 1540 /* Bypass for VF */
1541 if (amdgpu_sriov_vf(adev))
1542 return 0;
1543
e372baeb
MJ
1544 /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1545 if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
12b8b4e6 1546 DRM_WARN("System can't access extended configuration space, please check!!\n");
e372baeb 1547
b7221f2b
AD
1548 /* skip if the bios has already enabled large BAR */
1549 if (adev->gmc.real_vram_size &&
1550 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1551 return 0;
1552
31b8adab
CK
1553 /* Check if the root BUS has 64bit memory resources */
1554 root = adev->pdev->bus;
1555 while (root->parent)
1556 root = root->parent;
1557
1558 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1559 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1560 res->start > 0x100000000ull)
1561 break;
1562 }
1563
1564 /* Trying to resize is pointless without a root hub window above 4GB */
1565 if (!res)
1566 return 0;
1567
453f617a
ND
1568 /* Limit the BAR size to what is available */
1569 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1570 rbar_size);
1571
d6895ad3
CK
1572 /* Disable memory decoding while we change the BAR addresses and size */
1573 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1574 pci_write_config_word(adev->pdev, PCI_COMMAND,
1575 cmd & ~PCI_COMMAND_MEMORY);
1576
1577 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1578 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1579 if (adev->asic_type >= CHIP_BONAIRE)
1580 pci_release_resource(adev->pdev, 2);
1581
1582 pci_release_resource(adev->pdev, 0);
1583
1584 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1585 if (r == -ENOSPC)
1586 DRM_INFO("Not enough PCI address space for a large BAR.");
1587 else if (r && r != -ENOTSUPP)
1588 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1589
1590 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1591
1592 /* When the doorbell or fb BAR isn't available we have no chance of
1593 * using the device.
1594 */
43c064db 1595 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1596 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1597 return -ENODEV;
1598
1599 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1600
1601 return 0;
1602}
a05502e5 1603
9535a86a
SZ
1604static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1605{
b8920e1e 1606 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1607 return false;
9535a86a
SZ
1608
1609 return true;
1610}
1611
d38ceaf9
AD
1612/*
1613 * GPU helpers function.
1614 */
1615/**
39c640c0 1616 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1617 *
1618 * @adev: amdgpu_device pointer
1619 *
c836fec5
JQ
1620 * Check if the asic has been initialized (all asics) at driver startup
1621 * or post is needed if hw reset is performed.
1622 * Returns true if need or false if not.
d38ceaf9 1623 */
39c640c0 1624bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1625{
1626 uint32_t reg;
1627
bec86378
ML
1628 if (amdgpu_sriov_vf(adev))
1629 return false;
1630
9535a86a
SZ
1631 if (!amdgpu_device_read_bios(adev))
1632 return false;
1633
bec86378 1634 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1635 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1636 * some old smc fw still need driver do vPost otherwise gpu hang, while
1637 * those smc fw version above 22.15 doesn't have this flaw, so we force
1638 * vpost executed for smc version below 22.15
bec86378
ML
1639 */
1640 if (adev->asic_type == CHIP_FIJI) {
1641 int err;
1642 uint32_t fw_ver;
b8920e1e 1643
bec86378
ML
1644 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1645 /* force vPost if error occured */
1646 if (err)
1647 return true;
1648
1649 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
8a44fdd3 1650 release_firmware(adev->pm.fw);
1da2c326
ML
1651 if (fw_ver < 0x00160e00)
1652 return true;
bec86378 1653 }
bec86378 1654 }
91fe77eb 1655
e3c1b071 1656 /* Don't post if we need to reset whole hive on init */
1657 if (adev->gmc.xgmi.pending_reset)
1658 return false;
1659
91fe77eb 1660 if (adev->has_hw_reset) {
1661 adev->has_hw_reset = false;
1662 return true;
1663 }
1664
1665 /* bios scratch used on CIK+ */
1666 if (adev->asic_type >= CHIP_BONAIRE)
1667 return amdgpu_atombios_scratch_need_asic_init(adev);
1668
1669 /* check MEM_SIZE for older asics */
1670 reg = amdgpu_asic_get_config_memsize(adev);
1671
1672 if ((reg != 0) && (reg != 0xffffffff))
1673 return false;
1674
1675 return true;
bec86378
ML
1676}
1677
5d1eb4c4 1678/*
bb0f8429
ML
1679 * Check whether seamless boot is supported.
1680 *
7f4ce7b5
ML
1681 * So far we only support seamless boot on DCE 3.0 or later.
1682 * If users report that it works on older ASICS as well, we may
1683 * loosen this.
bb0f8429
ML
1684 */
1685bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1686{
5dc270d3
ML
1687 switch (amdgpu_seamless) {
1688 case -1:
1689 break;
1690 case 1:
1691 return true;
1692 case 0:
1693 return false;
1694 default:
1695 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1696 amdgpu_seamless);
1697 return false;
1698 }
1699
3657a1d5
ML
1700 if (!(adev->flags & AMD_IS_APU))
1701 return false;
1702
5dc270d3
ML
1703 if (adev->mman.keep_stolen_vga_memory)
1704 return false;
1705
ed342a2e 1706 return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1707}
1708
5d1eb4c4 1709/*
2757a848
ML
1710 * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1711 * don't support dynamic speed switching. Until we have confirmation from Intel
1712 * that a specific host supports it, it's safer that we keep it disabled for all.
5d1eb4c4
ML
1713 *
1714 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1715 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1716 */
d9b3a066 1717static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
5d1eb4c4
ML
1718{
1719#if IS_ENABLED(CONFIG_X86)
1720 struct cpuinfo_x86 *c = &cpu_data(0);
1721
d9b3a066
ML
1722 /* eGPU change speeds based on USB4 fabric conditions */
1723 if (dev_is_removable(adev->dev))
1724 return true;
1725
5d1eb4c4
ML
1726 if (c->x86_vendor == X86_VENDOR_INTEL)
1727 return false;
1728#endif
1729 return true;
1730}
1731
0ab5d711
ML
1732/**
1733 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1734 *
1735 * @adev: amdgpu_device pointer
1736 *
1737 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1738 * be set for this device.
1739 *
1740 * Returns true if it should be used or false if not.
1741 */
1742bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1743{
1744 switch (amdgpu_aspm) {
1745 case -1:
1746 break;
1747 case 0:
1748 return false;
1749 case 1:
1750 return true;
1751 default:
1752 return false;
1753 }
1a6513de
ML
1754 if (adev->flags & AMD_IS_APU)
1755 return false;
2757a848
ML
1756 if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1757 return false;
0ab5d711
ML
1758 return pcie_aspm_enabled(adev->pdev);
1759}
1760
d38ceaf9
AD
1761/* if we get transitioned to only one device, take VGA back */
1762/**
06ec9070 1763 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1764 *
bf44e8ce 1765 * @pdev: PCI device pointer
d38ceaf9
AD
1766 * @state: enable/disable vga decode
1767 *
1768 * Enable/disable vga decode (all asics).
1769 * Returns VGA resource flags.
1770 */
bf44e8ce
CH
1771static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1772 bool state)
d38ceaf9 1773{
bf44e8ce 1774 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1775
d38ceaf9
AD
1776 amdgpu_asic_set_vga_state(adev, state);
1777 if (state)
1778 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1779 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1780 else
1781 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1782}
1783
e3ecdffa
AD
1784/**
1785 * amdgpu_device_check_block_size - validate the vm block size
1786 *
1787 * @adev: amdgpu_device pointer
1788 *
1789 * Validates the vm block size specified via module parameter.
1790 * The vm block size defines number of bits in page table versus page directory,
1791 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1792 * page table and the remaining bits are in the page directory.
1793 */
06ec9070 1794static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1795{
1796 /* defines number of bits in page table versus page directory,
1797 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1798 * page table and the remaining bits are in the page directory
1799 */
bab4fee7
JZ
1800 if (amdgpu_vm_block_size == -1)
1801 return;
a1adf8be 1802
bab4fee7 1803 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1804 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1805 amdgpu_vm_block_size);
97489129 1806 amdgpu_vm_block_size = -1;
a1adf8be 1807 }
a1adf8be
CZ
1808}
1809
e3ecdffa
AD
1810/**
1811 * amdgpu_device_check_vm_size - validate the vm size
1812 *
1813 * @adev: amdgpu_device pointer
1814 *
1815 * Validates the vm size in GB specified via module parameter.
1816 * The VM size is the size of the GPU virtual memory space in GB.
1817 */
06ec9070 1818static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1819{
64dab074
AD
1820 /* no need to check the default value */
1821 if (amdgpu_vm_size == -1)
1822 return;
1823
83ca145d
ZJ
1824 if (amdgpu_vm_size < 1) {
1825 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1826 amdgpu_vm_size);
f3368128 1827 amdgpu_vm_size = -1;
83ca145d 1828 }
83ca145d
ZJ
1829}
1830
7951e376
RZ
1831static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1832{
1833 struct sysinfo si;
a9d4fe2f 1834 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1835 uint64_t total_memory;
1836 uint64_t dram_size_seven_GB = 0x1B8000000;
1837 uint64_t dram_size_three_GB = 0xB8000000;
1838
1839 if (amdgpu_smu_memory_pool_size == 0)
1840 return;
1841
1842 if (!is_os_64) {
1843 DRM_WARN("Not 64-bit OS, feature not supported\n");
1844 goto def_value;
1845 }
1846 si_meminfo(&si);
1847 total_memory = (uint64_t)si.totalram * si.mem_unit;
1848
1849 if ((amdgpu_smu_memory_pool_size == 1) ||
1850 (amdgpu_smu_memory_pool_size == 2)) {
1851 if (total_memory < dram_size_three_GB)
1852 goto def_value1;
1853 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1854 (amdgpu_smu_memory_pool_size == 8)) {
1855 if (total_memory < dram_size_seven_GB)
1856 goto def_value1;
1857 } else {
1858 DRM_WARN("Smu memory pool size not supported\n");
1859 goto def_value;
1860 }
1861 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1862
1863 return;
1864
1865def_value1:
1866 DRM_WARN("No enough system memory\n");
1867def_value:
1868 adev->pm.smu_prv_buffer_size = 0;
1869}
1870
9f6a7857
HR
1871static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1872{
1873 if (!(adev->flags & AMD_IS_APU) ||
1874 adev->asic_type < CHIP_RAVEN)
1875 return 0;
1876
1877 switch (adev->asic_type) {
1878 case CHIP_RAVEN:
1879 if (adev->pdev->device == 0x15dd)
1880 adev->apu_flags |= AMD_APU_IS_RAVEN;
1881 if (adev->pdev->device == 0x15d8)
1882 adev->apu_flags |= AMD_APU_IS_PICASSO;
1883 break;
1884 case CHIP_RENOIR:
1885 if ((adev->pdev->device == 0x1636) ||
1886 (adev->pdev->device == 0x164c))
1887 adev->apu_flags |= AMD_APU_IS_RENOIR;
1888 else
1889 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1890 break;
1891 case CHIP_VANGOGH:
1892 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1893 break;
1894 case CHIP_YELLOW_CARP:
1895 break;
d0f56dc2 1896 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1897 if ((adev->pdev->device == 0x13FE) ||
1898 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1899 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1900 break;
9f6a7857 1901 default:
4eaf21b7 1902 break;
9f6a7857
HR
1903 }
1904
1905 return 0;
1906}
1907
d38ceaf9 1908/**
06ec9070 1909 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1910 *
1911 * @adev: amdgpu_device pointer
1912 *
1913 * Validates certain module parameters and updates
1914 * the associated values used by the driver (all asics).
1915 */
912dfc84 1916static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1917{
5b011235
CZ
1918 if (amdgpu_sched_jobs < 4) {
1919 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1920 amdgpu_sched_jobs);
1921 amdgpu_sched_jobs = 4;
47fc644f 1922 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1923 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1924 amdgpu_sched_jobs);
1925 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1926 }
d38ceaf9 1927
83e74db6 1928 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1929 /* gart size must be greater or equal to 32M */
1930 dev_warn(adev->dev, "gart size (%d) too small\n",
1931 amdgpu_gart_size);
83e74db6 1932 amdgpu_gart_size = -1;
d38ceaf9
AD
1933 }
1934
36d38372 1935 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1936 /* gtt size must be greater or equal to 32M */
36d38372
CK
1937 dev_warn(adev->dev, "gtt size (%d) too small\n",
1938 amdgpu_gtt_size);
1939 amdgpu_gtt_size = -1;
d38ceaf9
AD
1940 }
1941
d07f14be
RH
1942 /* valid range is between 4 and 9 inclusive */
1943 if (amdgpu_vm_fragment_size != -1 &&
1944 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1945 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1946 amdgpu_vm_fragment_size = -1;
1947 }
1948
5d5bd5e3
KW
1949 if (amdgpu_sched_hw_submission < 2) {
1950 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1951 amdgpu_sched_hw_submission);
1952 amdgpu_sched_hw_submission = 2;
1953 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1954 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1955 amdgpu_sched_hw_submission);
1956 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1957 }
1958
2656fd23
AG
1959 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1960 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1961 amdgpu_reset_method = -1;
1962 }
1963
7951e376
RZ
1964 amdgpu_device_check_smu_prv_buffer_size(adev);
1965
06ec9070 1966 amdgpu_device_check_vm_size(adev);
d38ceaf9 1967
06ec9070 1968 amdgpu_device_check_block_size(adev);
6a7f76e7 1969
19aede77 1970 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1971
e3c00faa 1972 return 0;
d38ceaf9
AD
1973}
1974
1975/**
1976 * amdgpu_switcheroo_set_state - set switcheroo state
1977 *
1978 * @pdev: pci dev pointer
1694467b 1979 * @state: vga_switcheroo state
d38ceaf9 1980 *
12024b17 1981 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1982 * the asics before or after it is powered up using ACPI methods.
1983 */
8aba21b7
LT
1984static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1985 enum vga_switcheroo_state state)
d38ceaf9
AD
1986{
1987 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1988 int r;
d38ceaf9 1989
b98c6299 1990 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1991 return;
1992
1993 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1994 pr_info("switched on\n");
d38ceaf9
AD
1995 /* don't suspend or resume card normally */
1996 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1997
8f66090b
TZ
1998 pci_set_power_state(pdev, PCI_D0);
1999 amdgpu_device_load_pci_state(pdev);
2000 r = pci_enable_device(pdev);
de185019
AD
2001 if (r)
2002 DRM_WARN("pci_enable_device failed (%d)\n", r);
2003 amdgpu_device_resume(dev, true);
d38ceaf9 2004
d38ceaf9 2005 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 2006 } else {
dd4fa6c1 2007 pr_info("switched off\n");
d38ceaf9 2008 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
5095d541 2009 amdgpu_device_prepare(dev);
de185019 2010 amdgpu_device_suspend(dev, true);
8f66090b 2011 amdgpu_device_cache_pci_state(pdev);
de185019 2012 /* Shut down the device */
8f66090b
TZ
2013 pci_disable_device(pdev);
2014 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
2015 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2016 }
2017}
2018
2019/**
2020 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2021 *
2022 * @pdev: pci dev pointer
2023 *
2024 * Callback for the switcheroo driver. Check of the switcheroo
2025 * state can be changed.
2026 * Returns true if the state can be changed, false if not.
2027 */
2028static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2029{
2030 struct drm_device *dev = pci_get_drvdata(pdev);
2031
b8920e1e 2032 /*
d38ceaf9
AD
2033 * FIXME: open_count is protected by drm_global_mutex but that would lead to
2034 * locking inversion with the driver load path. And the access here is
2035 * completely racy anyway. So don't bother with locking for now.
2036 */
7e13ad89 2037 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
2038}
2039
2040static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2041 .set_gpu_state = amdgpu_switcheroo_set_state,
2042 .reprobe = NULL,
2043 .can_switch = amdgpu_switcheroo_can_switch,
2044};
2045
e3ecdffa
AD
2046/**
2047 * amdgpu_device_ip_set_clockgating_state - set the CG state
2048 *
87e3f136 2049 * @dev: amdgpu_device pointer
e3ecdffa
AD
2050 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2051 * @state: clockgating state (gate or ungate)
2052 *
2053 * Sets the requested clockgating state for all instances of
2054 * the hardware IP specified.
2055 * Returns the error code from the last instance.
2056 */
43fa561f 2057int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
2058 enum amd_ip_block_type block_type,
2059 enum amd_clockgating_state state)
d38ceaf9 2060{
43fa561f 2061 struct amdgpu_device *adev = dev;
d38ceaf9
AD
2062 int i, r = 0;
2063
2064 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2065 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2066 continue;
c722865a
RZ
2067 if (adev->ip_blocks[i].version->type != block_type)
2068 continue;
2069 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2070 continue;
2071 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2072 (void *)adev, state);
2073 if (r)
2074 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2075 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
2076 }
2077 return r;
2078}
2079
e3ecdffa
AD
2080/**
2081 * amdgpu_device_ip_set_powergating_state - set the PG state
2082 *
87e3f136 2083 * @dev: amdgpu_device pointer
e3ecdffa
AD
2084 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2085 * @state: powergating state (gate or ungate)
2086 *
2087 * Sets the requested powergating state for all instances of
2088 * the hardware IP specified.
2089 * Returns the error code from the last instance.
2090 */
43fa561f 2091int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
2092 enum amd_ip_block_type block_type,
2093 enum amd_powergating_state state)
d38ceaf9 2094{
43fa561f 2095 struct amdgpu_device *adev = dev;
d38ceaf9
AD
2096 int i, r = 0;
2097
2098 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2099 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2100 continue;
c722865a
RZ
2101 if (adev->ip_blocks[i].version->type != block_type)
2102 continue;
2103 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2104 continue;
2105 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2106 (void *)adev, state);
2107 if (r)
2108 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2109 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
2110 }
2111 return r;
2112}
2113
e3ecdffa
AD
2114/**
2115 * amdgpu_device_ip_get_clockgating_state - get the CG state
2116 *
2117 * @adev: amdgpu_device pointer
2118 * @flags: clockgating feature flags
2119 *
2120 * Walks the list of IPs on the device and updates the clockgating
2121 * flags for each IP.
2122 * Updates @flags with the feature flags for each hardware IP where
2123 * clockgating is enabled.
2124 */
2990a1fc 2125void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 2126 u64 *flags)
6cb2d4e4
HR
2127{
2128 int i;
2129
2130 for (i = 0; i < adev->num_ip_blocks; i++) {
2131 if (!adev->ip_blocks[i].status.valid)
2132 continue;
2133 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2134 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2135 }
2136}
2137
e3ecdffa
AD
2138/**
2139 * amdgpu_device_ip_wait_for_idle - wait for idle
2140 *
2141 * @adev: amdgpu_device pointer
2142 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2143 *
2144 * Waits for the request hardware IP to be idle.
2145 * Returns 0 for success or a negative error code on failure.
2146 */
2990a1fc
AD
2147int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2148 enum amd_ip_block_type block_type)
5dbbb60b
AD
2149{
2150 int i, r;
2151
2152 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2153 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2154 continue;
a1255107
AD
2155 if (adev->ip_blocks[i].version->type == block_type) {
2156 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
2157 if (r)
2158 return r;
2159 break;
2160 }
2161 }
2162 return 0;
2163
2164}
2165
e3ecdffa
AD
2166/**
2167 * amdgpu_device_ip_is_idle - is the hardware IP idle
2168 *
2169 * @adev: amdgpu_device pointer
2170 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2171 *
2172 * Check if the hardware IP is idle or not.
2173 * Returns true if it the IP is idle, false if not.
2174 */
2990a1fc
AD
2175bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2176 enum amd_ip_block_type block_type)
5dbbb60b
AD
2177{
2178 int i;
2179
2180 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2181 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2182 continue;
a1255107
AD
2183 if (adev->ip_blocks[i].version->type == block_type)
2184 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
2185 }
2186 return true;
2187
2188}
2189
e3ecdffa
AD
2190/**
2191 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2192 *
2193 * @adev: amdgpu_device pointer
87e3f136 2194 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
2195 *
2196 * Returns a pointer to the hardware IP block structure
2197 * if it exists for the asic, otherwise NULL.
2198 */
2990a1fc
AD
2199struct amdgpu_ip_block *
2200amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2201 enum amd_ip_block_type type)
d38ceaf9
AD
2202{
2203 int i;
2204
2205 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 2206 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
2207 return &adev->ip_blocks[i];
2208
2209 return NULL;
2210}
2211
2212/**
2990a1fc 2213 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
2214 *
2215 * @adev: amdgpu_device pointer
5fc3aeeb 2216 * @type: enum amd_ip_block_type
d38ceaf9
AD
2217 * @major: major version
2218 * @minor: minor version
2219 *
2220 * return 0 if equal or greater
2221 * return 1 if smaller or the ip_block doesn't exist
2222 */
2990a1fc
AD
2223int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2224 enum amd_ip_block_type type,
2225 u32 major, u32 minor)
d38ceaf9 2226{
2990a1fc 2227 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 2228
a1255107
AD
2229 if (ip_block && ((ip_block->version->major > major) ||
2230 ((ip_block->version->major == major) &&
2231 (ip_block->version->minor >= minor))))
d38ceaf9
AD
2232 return 0;
2233
2234 return 1;
2235}
2236
a1255107 2237/**
2990a1fc 2238 * amdgpu_device_ip_block_add
a1255107
AD
2239 *
2240 * @adev: amdgpu_device pointer
2241 * @ip_block_version: pointer to the IP to add
2242 *
2243 * Adds the IP block driver information to the collection of IPs
2244 * on the asic.
2245 */
2990a1fc
AD
2246int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2247 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2248{
2249 if (!ip_block_version)
2250 return -EINVAL;
2251
7bd939d0
LG
2252 switch (ip_block_version->type) {
2253 case AMD_IP_BLOCK_TYPE_VCN:
2254 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2255 return 0;
2256 break;
2257 case AMD_IP_BLOCK_TYPE_JPEG:
2258 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2259 return 0;
2260 break;
2261 default:
2262 break;
2263 }
2264
e966a725 2265 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2266 ip_block_version->funcs->name);
2267
a1255107
AD
2268 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2269
2270 return 0;
2271}
2272
e3ecdffa
AD
2273/**
2274 * amdgpu_device_enable_virtual_display - enable virtual display feature
2275 *
2276 * @adev: amdgpu_device pointer
2277 *
2278 * Enabled the virtual display feature if the user has enabled it via
2279 * the module parameter virtual_display. This feature provides a virtual
2280 * display hardware on headless boards or in virtualized environments.
2281 * This function parses and validates the configuration string specified by
2282 * the user and configues the virtual display configuration (number of
2283 * virtual connectors, crtcs, etc.) specified.
2284 */
483ef985 2285static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2286{
2287 adev->enable_virtual_display = false;
2288
2289 if (amdgpu_virtual_display) {
8f66090b 2290 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2291 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2292
2293 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2294 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2295 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2296 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2297 if (!strcmp("all", pciaddname)
2298 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2299 long num_crtc;
2300 int res = -1;
2301
9accf2fd 2302 adev->enable_virtual_display = true;
0f66356d
ED
2303
2304 if (pciaddname_tmp)
2305 res = kstrtol(pciaddname_tmp, 10,
2306 &num_crtc);
2307
2308 if (!res) {
2309 if (num_crtc < 1)
2310 num_crtc = 1;
2311 if (num_crtc > 6)
2312 num_crtc = 6;
2313 adev->mode_info.num_crtc = num_crtc;
2314 } else {
2315 adev->mode_info.num_crtc = 1;
2316 }
9accf2fd
ED
2317 break;
2318 }
2319 }
2320
0f66356d
ED
2321 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2322 amdgpu_virtual_display, pci_address_name,
2323 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2324
2325 kfree(pciaddstr);
2326 }
2327}
2328
25263da3
AD
2329void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2330{
2331 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2332 adev->mode_info.num_crtc = 1;
2333 adev->enable_virtual_display = true;
2334 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2335 adev->enable_virtual_display, adev->mode_info.num_crtc);
2336 }
2337}
2338
e3ecdffa
AD
2339/**
2340 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2341 *
2342 * @adev: amdgpu_device pointer
2343 *
2344 * Parses the asic configuration parameters specified in the gpu info
2345 * firmware and makes them availale to the driver for use in configuring
2346 * the asic.
2347 * Returns 0 on success, -EINVAL on failure.
2348 */
e2a75f88
AD
2349static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2350{
e2a75f88 2351 const char *chip_name;
c0a43457 2352 char fw_name[40];
e2a75f88
AD
2353 int err;
2354 const struct gpu_info_firmware_header_v1_0 *hdr;
2355
ab4fe3e1
HR
2356 adev->firmware.gpu_info_fw = NULL;
2357
fb915c87
AD
2358 if (adev->mman.discovery_bin)
2359 return 0;
258620d0 2360
e2a75f88 2361 switch (adev->asic_type) {
e2a75f88
AD
2362 default:
2363 return 0;
2364 case CHIP_VEGA10:
2365 chip_name = "vega10";
2366 break;
3f76dced
AD
2367 case CHIP_VEGA12:
2368 chip_name = "vega12";
2369 break;
2d2e5e7e 2370 case CHIP_RAVEN:
54f78a76 2371 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2372 chip_name = "raven2";
54f78a76 2373 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2374 chip_name = "picasso";
54c4d17e
FX
2375 else
2376 chip_name = "raven";
2d2e5e7e 2377 break;
65e60f6e
LM
2378 case CHIP_ARCTURUS:
2379 chip_name = "arcturus";
2380 break;
42b325e5
XY
2381 case CHIP_NAVI12:
2382 chip_name = "navi12";
2383 break;
e2a75f88
AD
2384 }
2385
2386 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2387 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2388 if (err) {
2389 dev_err(adev->dev,
b31d3063 2390 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2391 fw_name);
2392 goto out;
2393 }
2394
ab4fe3e1 2395 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2396 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2397
2398 switch (hdr->version_major) {
2399 case 1:
2400 {
2401 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2402 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2403 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2404
cc375d8c
TY
2405 /*
2406 * Should be droped when DAL no longer needs it.
2407 */
2408 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2409 goto parse_soc_bounding_box;
2410
b5ab16bf
AD
2411 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2412 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2413 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2414 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2415 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2416 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2417 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2418 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2419 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2420 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2421 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2422 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2423 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2424 adev->gfx.cu_info.max_waves_per_simd =
2425 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2426 adev->gfx.cu_info.max_scratch_slots_per_cu =
2427 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2428 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2429 if (hdr->version_minor >= 1) {
35c2e910
HZ
2430 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2431 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2432 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2433 adev->gfx.config.num_sc_per_sh =
2434 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2435 adev->gfx.config.num_packer_per_sc =
2436 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2437 }
ec51d3fa
XY
2438
2439parse_soc_bounding_box:
ec51d3fa
XY
2440 /*
2441 * soc bounding box info is not integrated in disocovery table,
258620d0 2442 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2443 */
48321c3d
HW
2444 if (hdr->version_minor == 2) {
2445 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2446 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2447 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2448 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2449 }
e2a75f88
AD
2450 break;
2451 }
2452 default:
2453 dev_err(adev->dev,
2454 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2455 err = -EINVAL;
2456 goto out;
2457 }
2458out:
e2a75f88
AD
2459 return err;
2460}
2461
e3ecdffa
AD
2462/**
2463 * amdgpu_device_ip_early_init - run early init for hardware IPs
2464 *
2465 * @adev: amdgpu_device pointer
2466 *
2467 * Early initialization pass for hardware IPs. The hardware IPs that make
2468 * up each asic are discovered each IP's early_init callback is run. This
2469 * is the first stage in initializing the asic.
2470 * Returns 0 on success, negative error code on failure.
2471 */
06ec9070 2472static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2473{
901e2be2 2474 struct pci_dev *parent;
aaa36a97 2475 int i, r;
ced69502 2476 bool total;
d38ceaf9 2477
483ef985 2478 amdgpu_device_enable_virtual_display(adev);
a6be7570 2479
00a979f3 2480 if (amdgpu_sriov_vf(adev)) {
00a979f3 2481 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2482 if (r)
2483 return r;
00a979f3
WS
2484 }
2485
d38ceaf9 2486 switch (adev->asic_type) {
33f34802
KW
2487#ifdef CONFIG_DRM_AMDGPU_SI
2488 case CHIP_VERDE:
2489 case CHIP_TAHITI:
2490 case CHIP_PITCAIRN:
2491 case CHIP_OLAND:
2492 case CHIP_HAINAN:
295d0daf 2493 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2494 r = si_set_ip_blocks(adev);
2495 if (r)
2496 return r;
2497 break;
2498#endif
a2e73f56
AD
2499#ifdef CONFIG_DRM_AMDGPU_CIK
2500 case CHIP_BONAIRE:
2501 case CHIP_HAWAII:
2502 case CHIP_KAVERI:
2503 case CHIP_KABINI:
2504 case CHIP_MULLINS:
e1ad2d53 2505 if (adev->flags & AMD_IS_APU)
a2e73f56 2506 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2507 else
2508 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2509
2510 r = cik_set_ip_blocks(adev);
2511 if (r)
2512 return r;
2513 break;
2514#endif
da87c30b
AD
2515 case CHIP_TOPAZ:
2516 case CHIP_TONGA:
2517 case CHIP_FIJI:
2518 case CHIP_POLARIS10:
2519 case CHIP_POLARIS11:
2520 case CHIP_POLARIS12:
2521 case CHIP_VEGAM:
2522 case CHIP_CARRIZO:
2523 case CHIP_STONEY:
2524 if (adev->flags & AMD_IS_APU)
2525 adev->family = AMDGPU_FAMILY_CZ;
2526 else
2527 adev->family = AMDGPU_FAMILY_VI;
2528
2529 r = vi_set_ip_blocks(adev);
2530 if (r)
2531 return r;
2532 break;
d38ceaf9 2533 default:
63352b7f
AD
2534 r = amdgpu_discovery_set_ip_blocks(adev);
2535 if (r)
2536 return r;
2537 break;
d38ceaf9
AD
2538 }
2539
901e2be2
AD
2540 if (amdgpu_has_atpx() &&
2541 (amdgpu_is_atpx_hybrid() ||
2542 amdgpu_has_atpx_dgpu_power_cntl()) &&
2543 ((adev->flags & AMD_IS_APU) == 0) &&
7b1c6263 2544 !dev_is_removable(&adev->pdev->dev))
901e2be2
AD
2545 adev->flags |= AMD_IS_PX;
2546
85ac2021 2547 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2548 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2549 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2550 }
901e2be2 2551
1884734a 2552
3b94fb10 2553 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2554 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2555 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2556 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2557 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
d9b3a066 2558 if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
fbf1035b 2559 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
00f54b97 2560
ced69502 2561 total = true;
d38ceaf9
AD
2562 for (i = 0; i < adev->num_ip_blocks; i++) {
2563 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2564 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2565 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2566 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2567 } else {
a1255107
AD
2568 if (adev->ip_blocks[i].version->funcs->early_init) {
2569 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2570 if (r == -ENOENT) {
a1255107 2571 adev->ip_blocks[i].status.valid = false;
2c1a2784 2572 } else if (r) {
a1255107
AD
2573 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2574 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2575 total = false;
2c1a2784 2576 } else {
a1255107 2577 adev->ip_blocks[i].status.valid = true;
2c1a2784 2578 }
974e6b64 2579 } else {
a1255107 2580 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2581 }
d38ceaf9 2582 }
21a249ca
AD
2583 /* get the vbios after the asic_funcs are set up */
2584 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2585 r = amdgpu_device_parse_gpu_info_fw(adev);
2586 if (r)
2587 return r;
2588
21a249ca 2589 /* Read BIOS */
9535a86a
SZ
2590 if (amdgpu_device_read_bios(adev)) {
2591 if (!amdgpu_get_bios(adev))
2592 return -EINVAL;
21a249ca 2593
9535a86a
SZ
2594 r = amdgpu_atombios_init(adev);
2595 if (r) {
2596 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2597 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2598 return r;
2599 }
21a249ca 2600 }
77eabc6f
PJZ
2601
2602 /*get pf2vf msg info at it's earliest time*/
2603 if (amdgpu_sriov_vf(adev))
2604 amdgpu_virt_init_data_exchange(adev);
2605
21a249ca 2606 }
d38ceaf9 2607 }
ced69502
ML
2608 if (!total)
2609 return -ENODEV;
d38ceaf9 2610
00fa4035 2611 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2612 adev->cg_flags &= amdgpu_cg_mask;
2613 adev->pg_flags &= amdgpu_pg_mask;
2614
d38ceaf9
AD
2615 return 0;
2616}
2617
0a4f2520
RZ
2618static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2619{
2620 int i, r;
2621
2622 for (i = 0; i < adev->num_ip_blocks; i++) {
2623 if (!adev->ip_blocks[i].status.sw)
2624 continue;
2625 if (adev->ip_blocks[i].status.hw)
2626 continue;
2627 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2628 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2629 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2630 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2631 if (r) {
2632 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2633 adev->ip_blocks[i].version->funcs->name, r);
2634 return r;
2635 }
2636 adev->ip_blocks[i].status.hw = true;
2637 }
2638 }
2639
2640 return 0;
2641}
2642
2643static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2644{
2645 int i, r;
2646
2647 for (i = 0; i < adev->num_ip_blocks; i++) {
2648 if (!adev->ip_blocks[i].status.sw)
2649 continue;
2650 if (adev->ip_blocks[i].status.hw)
2651 continue;
2652 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2653 if (r) {
2654 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2655 adev->ip_blocks[i].version->funcs->name, r);
2656 return r;
2657 }
2658 adev->ip_blocks[i].status.hw = true;
2659 }
2660
2661 return 0;
2662}
2663
7a3e0bb2
RZ
2664static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2665{
2666 int r = 0;
2667 int i;
80f41f84 2668 uint32_t smu_version;
7a3e0bb2
RZ
2669
2670 if (adev->asic_type >= CHIP_VEGA10) {
2671 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2672 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2673 continue;
2674
e3c1b071 2675 if (!adev->ip_blocks[i].status.sw)
2676 continue;
2677
482f0e53
ML
2678 /* no need to do the fw loading again if already done*/
2679 if (adev->ip_blocks[i].status.hw == true)
2680 break;
2681
53b3f8f4 2682 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2683 r = adev->ip_blocks[i].version->funcs->resume(adev);
2684 if (r) {
2685 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2686 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2687 return r;
2688 }
2689 } else {
2690 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2691 if (r) {
2692 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2693 adev->ip_blocks[i].version->funcs->name, r);
2694 return r;
7a3e0bb2 2695 }
7a3e0bb2 2696 }
482f0e53
ML
2697
2698 adev->ip_blocks[i].status.hw = true;
2699 break;
7a3e0bb2
RZ
2700 }
2701 }
482f0e53 2702
8973d9ec
ED
2703 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2704 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2705
80f41f84 2706 return r;
7a3e0bb2
RZ
2707}
2708
5fd8518d
AG
2709static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2710{
2711 long timeout;
2712 int r, i;
2713
2714 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2715 struct amdgpu_ring *ring = adev->rings[i];
2716
2717 /* No need to setup the GPU scheduler for rings that don't need it */
2718 if (!ring || ring->no_scheduler)
2719 continue;
2720
2721 switch (ring->funcs->type) {
2722 case AMDGPU_RING_TYPE_GFX:
2723 timeout = adev->gfx_timeout;
2724 break;
2725 case AMDGPU_RING_TYPE_COMPUTE:
2726 timeout = adev->compute_timeout;
2727 break;
2728 case AMDGPU_RING_TYPE_SDMA:
2729 timeout = adev->sdma_timeout;
2730 break;
2731 default:
2732 timeout = adev->video_timeout;
2733 break;
2734 }
2735
a6149f03 2736 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
56e44960 2737 DRM_SCHED_PRIORITY_COUNT,
11f25c84 2738 ring->num_hw_submission, 0,
8ab62eda
JG
2739 timeout, adev->reset_domain->wq,
2740 ring->sched_score, ring->name,
2741 adev->dev);
5fd8518d
AG
2742 if (r) {
2743 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2744 ring->name);
2745 return r;
2746 }
037b98a2
AD
2747 r = amdgpu_uvd_entity_init(adev, ring);
2748 if (r) {
2749 DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2750 ring->name);
2751 return r;
2752 }
2753 r = amdgpu_vce_entity_init(adev, ring);
2754 if (r) {
2755 DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2756 ring->name);
2757 return r;
2758 }
5fd8518d
AG
2759 }
2760
d425c6f4
JZ
2761 amdgpu_xcp_update_partition_sched_list(adev);
2762
5fd8518d
AG
2763 return 0;
2764}
2765
2766
e3ecdffa
AD
2767/**
2768 * amdgpu_device_ip_init - run init for hardware IPs
2769 *
2770 * @adev: amdgpu_device pointer
2771 *
2772 * Main initialization pass for hardware IPs. The list of all the hardware
2773 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2774 * are run. sw_init initializes the software state associated with each IP
2775 * and hw_init initializes the hardware associated with each IP.
2776 * Returns 0 on success, negative error code on failure.
2777 */
06ec9070 2778static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2779{
2780 int i, r;
2781
c030f2e4 2782 r = amdgpu_ras_init(adev);
2783 if (r)
2784 return r;
2785
d38ceaf9 2786 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2787 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2788 continue;
a1255107 2789 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2790 if (r) {
a1255107
AD
2791 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2792 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2793 goto init_failed;
2c1a2784 2794 }
a1255107 2795 adev->ip_blocks[i].status.sw = true;
bfca0289 2796
c1c39032
AD
2797 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2798 /* need to do common hw init early so everything is set up for gmc */
2799 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2800 if (r) {
2801 DRM_ERROR("hw_init %d failed %d\n", i, r);
2802 goto init_failed;
2803 }
2804 adev->ip_blocks[i].status.hw = true;
2805 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2806 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2807 /* Try to reserve bad pages early */
2808 if (amdgpu_sriov_vf(adev))
2809 amdgpu_virt_exchange_data(adev);
2810
7ccfd79f 2811 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2812 if (r) {
7ccfd79f 2813 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2814 goto init_failed;
2c1a2784 2815 }
a1255107 2816 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2817 if (r) {
2818 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2819 goto init_failed;
2c1a2784 2820 }
06ec9070 2821 r = amdgpu_device_wb_init(adev);
2c1a2784 2822 if (r) {
06ec9070 2823 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2824 goto init_failed;
2c1a2784 2825 }
a1255107 2826 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2827
2828 /* right after GMC hw init, we create CSA */
02ff519e 2829 if (adev->gfx.mcbp) {
1e256e27 2830 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2831 AMDGPU_GEM_DOMAIN_VRAM |
2832 AMDGPU_GEM_DOMAIN_GTT,
2833 AMDGPU_CSA_SIZE);
2493664f
ML
2834 if (r) {
2835 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2836 goto init_failed;
2493664f
ML
2837 }
2838 }
c8031019
APS
2839
2840 r = amdgpu_seq64_init(adev);
2841 if (r) {
2842 DRM_ERROR("allocate seq64 failed %d\n", r);
2843 goto init_failed;
2844 }
d38ceaf9
AD
2845 }
2846 }
2847
c9ffa427 2848 if (amdgpu_sriov_vf(adev))
22c16d25 2849 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2850
533aed27
AG
2851 r = amdgpu_ib_pool_init(adev);
2852 if (r) {
2853 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2854 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2855 goto init_failed;
2856 }
2857
c8963ea4
RZ
2858 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2859 if (r)
72d3f592 2860 goto init_failed;
0a4f2520
RZ
2861
2862 r = amdgpu_device_ip_hw_init_phase1(adev);
2863 if (r)
72d3f592 2864 goto init_failed;
0a4f2520 2865
7a3e0bb2
RZ
2866 r = amdgpu_device_fw_loading(adev);
2867 if (r)
72d3f592 2868 goto init_failed;
7a3e0bb2 2869
0a4f2520
RZ
2870 r = amdgpu_device_ip_hw_init_phase2(adev);
2871 if (r)
72d3f592 2872 goto init_failed;
d38ceaf9 2873
121a2bc6
AG
2874 /*
2875 * retired pages will be loaded from eeprom and reserved here,
2876 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2877 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2878 * for I2C communication which only true at this point.
b82e65a9
GC
2879 *
2880 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2881 * failure from bad gpu situation and stop amdgpu init process
2882 * accordingly. For other failed cases, it will still release all
2883 * the resource and print error message, rather than returning one
2884 * negative value to upper level.
121a2bc6
AG
2885 *
2886 * Note: theoretically, this should be called before all vram allocations
2887 * to protect retired page from abusing
2888 */
b82e65a9
GC
2889 r = amdgpu_ras_recovery_init(adev);
2890 if (r)
2891 goto init_failed;
121a2bc6 2892
cfbb6b00
AG
2893 /**
2894 * In case of XGMI grab extra reference for reset domain for this device
2895 */
a4c63caf 2896 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2897 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2898 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2899 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2900
dfd0287b
LH
2901 if (WARN_ON(!hive)) {
2902 r = -ENOENT;
2903 goto init_failed;
2904 }
2905
46c67660 2906 if (!hive->reset_domain ||
2907 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2908 r = -ENOENT;
2909 amdgpu_put_xgmi_hive(hive);
2910 goto init_failed;
2911 }
2912
2913 /* Drop the early temporary reset domain we created for device */
2914 amdgpu_reset_put_reset_domain(adev->reset_domain);
2915 adev->reset_domain = hive->reset_domain;
9dfa4860 2916 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2917 }
a4c63caf
AG
2918 }
2919 }
2920
5fd8518d
AG
2921 r = amdgpu_device_init_schedulers(adev);
2922 if (r)
2923 goto init_failed;
e3c1b071 2924
b7043800
AD
2925 if (adev->mman.buffer_funcs_ring->sched.ready)
2926 amdgpu_ttm_set_buffer_funcs_status(adev, true);
2927
e3c1b071 2928 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2929 if (!adev->gmc.xgmi.pending_reset) {
2930 kgd2kfd_init_zone_device(adev);
e3c1b071 2931 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2932 }
c6332b97 2933
bd607166
KR
2934 amdgpu_fru_get_product_info(adev);
2935
72d3f592 2936init_failed:
c6332b97 2937
72d3f592 2938 return r;
d38ceaf9
AD
2939}
2940
e3ecdffa
AD
2941/**
2942 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2943 *
2944 * @adev: amdgpu_device pointer
2945 *
2946 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2947 * this function before a GPU reset. If the value is retained after a
2948 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2949 */
06ec9070 2950static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2951{
2952 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2953}
2954
e3ecdffa
AD
2955/**
2956 * amdgpu_device_check_vram_lost - check if vram is valid
2957 *
2958 * @adev: amdgpu_device pointer
2959 *
2960 * Checks the reset magic value written to the gart pointer in VRAM.
2961 * The driver calls this after a GPU reset to see if the contents of
2962 * VRAM is lost or now.
2963 * returns true if vram is lost, false if not.
2964 */
06ec9070 2965static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2966{
dadce777
EQ
2967 if (memcmp(adev->gart.ptr, adev->reset_magic,
2968 AMDGPU_RESET_MAGIC_NUM))
2969 return true;
2970
53b3f8f4 2971 if (!amdgpu_in_reset(adev))
dadce777
EQ
2972 return false;
2973
2974 /*
2975 * For all ASICs with baco/mode1 reset, the VRAM is
2976 * always assumed to be lost.
2977 */
2978 switch (amdgpu_asic_reset_method(adev)) {
2979 case AMD_RESET_METHOD_BACO:
2980 case AMD_RESET_METHOD_MODE1:
2981 return true;
2982 default:
2983 return false;
2984 }
0c49e0b8
CZ
2985}
2986
e3ecdffa 2987/**
1112a46b 2988 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2989 *
2990 * @adev: amdgpu_device pointer
b8b72130 2991 * @state: clockgating state (gate or ungate)
e3ecdffa 2992 *
e3ecdffa 2993 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2994 * set_clockgating_state callbacks are run.
2995 * Late initialization pass enabling clockgating for hardware IPs.
2996 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2997 * Returns 0 on success, negative error code on failure.
2998 */
fdd34271 2999
5d89bb2d
LL
3000int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3001 enum amd_clockgating_state state)
d38ceaf9 3002{
1112a46b 3003 int i, j, r;
d38ceaf9 3004
4a2ba394
SL
3005 if (amdgpu_emu_mode == 1)
3006 return 0;
3007
1112a46b
RZ
3008 for (j = 0; j < adev->num_ip_blocks; j++) {
3009 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 3010 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 3011 continue;
47198eb7 3012 /* skip CG for GFX, SDMA on S0ix */
5d70a549 3013 if (adev->in_s0ix &&
47198eb7
AD
3014 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3015 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 3016 continue;
4a446d55 3017 /* skip CG for VCE/UVD, it's handled specially */
a1255107 3018 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 3019 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 3020 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 3021 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 3022 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 3023 /* enable clockgating to save power */
a1255107 3024 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 3025 state);
4a446d55
AD
3026 if (r) {
3027 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 3028 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
3029 return r;
3030 }
b0b00ff1 3031 }
d38ceaf9 3032 }
06b18f61 3033
c9f96fd5
RZ
3034 return 0;
3035}
3036
5d89bb2d
LL
3037int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3038 enum amd_powergating_state state)
c9f96fd5 3039{
1112a46b 3040 int i, j, r;
06b18f61 3041
c9f96fd5
RZ
3042 if (amdgpu_emu_mode == 1)
3043 return 0;
3044
1112a46b
RZ
3045 for (j = 0; j < adev->num_ip_blocks; j++) {
3046 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 3047 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 3048 continue;
47198eb7 3049 /* skip PG for GFX, SDMA on S0ix */
5d70a549 3050 if (adev->in_s0ix &&
47198eb7
AD
3051 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3052 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 3053 continue;
c9f96fd5
RZ
3054 /* skip CG for VCE/UVD, it's handled specially */
3055 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3056 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3057 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 3058 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
3059 adev->ip_blocks[i].version->funcs->set_powergating_state) {
3060 /* enable powergating to save power */
3061 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 3062 state);
c9f96fd5
RZ
3063 if (r) {
3064 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3065 adev->ip_blocks[i].version->funcs->name, r);
3066 return r;
3067 }
3068 }
3069 }
2dc80b00
S
3070 return 0;
3071}
3072
beff74bc
AD
3073static int amdgpu_device_enable_mgpu_fan_boost(void)
3074{
3075 struct amdgpu_gpu_instance *gpu_ins;
3076 struct amdgpu_device *adev;
3077 int i, ret = 0;
3078
3079 mutex_lock(&mgpu_info.mutex);
3080
3081 /*
3082 * MGPU fan boost feature should be enabled
3083 * only when there are two or more dGPUs in
3084 * the system
3085 */
3086 if (mgpu_info.num_dgpu < 2)
3087 goto out;
3088
3089 for (i = 0; i < mgpu_info.num_dgpu; i++) {
3090 gpu_ins = &(mgpu_info.gpu_ins[i]);
3091 adev = gpu_ins->adev;
3092 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 3093 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
3094 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3095 if (ret)
3096 break;
3097
3098 gpu_ins->mgpu_fan_enabled = 1;
3099 }
3100 }
3101
3102out:
3103 mutex_unlock(&mgpu_info.mutex);
3104
3105 return ret;
3106}
3107
e3ecdffa
AD
3108/**
3109 * amdgpu_device_ip_late_init - run late init for hardware IPs
3110 *
3111 * @adev: amdgpu_device pointer
3112 *
3113 * Late initialization pass for hardware IPs. The list of all the hardware
3114 * IPs that make up the asic is walked and the late_init callbacks are run.
3115 * late_init covers any special initialization that an IP requires
3116 * after all of the have been initialized or something that needs to happen
3117 * late in the init process.
3118 * Returns 0 on success, negative error code on failure.
3119 */
06ec9070 3120static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 3121{
60599a03 3122 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
3123 int i = 0, r;
3124
3125 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 3126 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
3127 continue;
3128 if (adev->ip_blocks[i].version->funcs->late_init) {
3129 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3130 if (r) {
3131 DRM_ERROR("late_init of IP block <%s> failed %d\n",
3132 adev->ip_blocks[i].version->funcs->name, r);
3133 return r;
3134 }
2dc80b00 3135 }
73f847db 3136 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
3137 }
3138
867e24ca 3139 r = amdgpu_ras_late_init(adev);
3140 if (r) {
3141 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3142 return r;
3143 }
3144
a891d239
DL
3145 amdgpu_ras_set_error_query_ready(adev, true);
3146
1112a46b
RZ
3147 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3148 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 3149
06ec9070 3150 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 3151
beff74bc
AD
3152 r = amdgpu_device_enable_mgpu_fan_boost();
3153 if (r)
3154 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3155
4da8b639 3156 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
3157 if (amdgpu_passthrough(adev) &&
3158 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3159 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 3160 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
3161
3162 if (adev->gmc.xgmi.num_physical_nodes > 1) {
3163 mutex_lock(&mgpu_info.mutex);
3164
3165 /*
3166 * Reset device p-state to low as this was booted with high.
3167 *
3168 * This should be performed only after all devices from the same
3169 * hive get initialized.
3170 *
3171 * However, it's unknown how many device in the hive in advance.
3172 * As this is counted one by one during devices initializations.
3173 *
3174 * So, we wait for all XGMI interlinked devices initialized.
3175 * This may bring some delays as those devices may come from
3176 * different hives. But that should be OK.
3177 */
3178 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3179 for (i = 0; i < mgpu_info.num_gpu; i++) {
3180 gpu_instance = &(mgpu_info.gpu_ins[i]);
3181 if (gpu_instance->adev->flags & AMD_IS_APU)
3182 continue;
3183
d84a430d
JK
3184 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3185 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
3186 if (r) {
3187 DRM_ERROR("pstate setting failed (%d).\n", r);
3188 break;
3189 }
3190 }
3191 }
3192
3193 mutex_unlock(&mgpu_info.mutex);
3194 }
3195
d38ceaf9
AD
3196 return 0;
3197}
3198
613aa3ea
LY
3199/**
3200 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3201 *
3202 * @adev: amdgpu_device pointer
3203 *
3204 * For ASICs need to disable SMC first
3205 */
3206static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3207{
3208 int i, r;
3209
4e8303cf 3210 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
3211 return;
3212
3213 for (i = 0; i < adev->num_ip_blocks; i++) {
3214 if (!adev->ip_blocks[i].status.hw)
3215 continue;
3216 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3217 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3218 /* XXX handle errors */
3219 if (r) {
3220 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3221 adev->ip_blocks[i].version->funcs->name, r);
3222 }
3223 adev->ip_blocks[i].status.hw = false;
3224 break;
3225 }
3226 }
3227}
3228
e9669fb7 3229static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
3230{
3231 int i, r;
3232
e9669fb7
AG
3233 for (i = 0; i < adev->num_ip_blocks; i++) {
3234 if (!adev->ip_blocks[i].version->funcs->early_fini)
3235 continue;
5278a159 3236
e9669fb7
AG
3237 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3238 if (r) {
3239 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3240 adev->ip_blocks[i].version->funcs->name, r);
3241 }
3242 }
c030f2e4 3243
05df1f01 3244 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
3245 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3246
7270e895
TY
3247 amdgpu_amdkfd_suspend(adev, false);
3248
613aa3ea
LY
3249 /* Workaroud for ASICs need to disable SMC first */
3250 amdgpu_device_smu_fini_early(adev);
3e96dbfd 3251
d38ceaf9 3252 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3253 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 3254 continue;
8201a67a 3255
a1255107 3256 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 3257 /* XXX handle errors */
2c1a2784 3258 if (r) {
a1255107
AD
3259 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3260 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3261 }
8201a67a 3262
a1255107 3263 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
3264 }
3265
6effad8a
GC
3266 if (amdgpu_sriov_vf(adev)) {
3267 if (amdgpu_virt_release_full_gpu(adev, false))
3268 DRM_ERROR("failed to release exclusive mode on fini\n");
3269 }
3270
e9669fb7
AG
3271 return 0;
3272}
3273
3274/**
3275 * amdgpu_device_ip_fini - run fini for hardware IPs
3276 *
3277 * @adev: amdgpu_device pointer
3278 *
3279 * Main teardown pass for hardware IPs. The list of all the hardware
3280 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3281 * are run. hw_fini tears down the hardware associated with each IP
3282 * and sw_fini tears down any software state associated with each IP.
3283 * Returns 0 on success, negative error code on failure.
3284 */
3285static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3286{
3287 int i, r;
3288
3289 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3290 amdgpu_virt_release_ras_err_handler_data(adev);
3291
e9669fb7
AG
3292 if (adev->gmc.xgmi.num_physical_nodes > 1)
3293 amdgpu_xgmi_remove_device(adev);
3294
c004d44e 3295 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3296
d38ceaf9 3297 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3298 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3299 continue;
c12aba3a
ML
3300
3301 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3302 amdgpu_ucode_free_bo(adev);
1e256e27 3303 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3304 amdgpu_device_wb_fini(adev);
7ccfd79f 3305 amdgpu_device_mem_scratch_fini(adev);
533aed27 3306 amdgpu_ib_pool_fini(adev);
c8031019 3307 amdgpu_seq64_fini(adev);
c12aba3a
ML
3308 }
3309
a1255107 3310 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3311 /* XXX handle errors */
2c1a2784 3312 if (r) {
a1255107
AD
3313 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3314 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3315 }
a1255107
AD
3316 adev->ip_blocks[i].status.sw = false;
3317 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3318 }
3319
a6dcfd9c 3320 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3321 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3322 continue;
a1255107
AD
3323 if (adev->ip_blocks[i].version->funcs->late_fini)
3324 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3325 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3326 }
3327
c030f2e4 3328 amdgpu_ras_fini(adev);
3329
d38ceaf9
AD
3330 return 0;
3331}
3332
e3ecdffa 3333/**
beff74bc 3334 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3335 *
1112a46b 3336 * @work: work_struct.
e3ecdffa 3337 */
beff74bc 3338static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3339{
3340 struct amdgpu_device *adev =
beff74bc 3341 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3342 int r;
3343
3344 r = amdgpu_ib_ring_tests(adev);
3345 if (r)
3346 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3347}
3348
1e317b99
RZ
3349static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3350{
3351 struct amdgpu_device *adev =
3352 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3353
90a92662
MD
3354 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3355 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3356
3357 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3358 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3359}
3360
e3ecdffa 3361/**
e7854a03 3362 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3363 *
3364 * @adev: amdgpu_device pointer
3365 *
3366 * Main suspend function for hardware IPs. The list of all the hardware
3367 * IPs that make up the asic is walked, clockgating is disabled and the
3368 * suspend callbacks are run. suspend puts the hardware and software state
3369 * in each IP into a state suitable for suspend.
3370 * Returns 0 on success, negative error code on failure.
3371 */
e7854a03
AD
3372static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3373{
3374 int i, r;
3375
50ec83f0
AD
3376 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3377 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3378
b31d6ada
EQ
3379 /*
3380 * Per PMFW team's suggestion, driver needs to handle gfxoff
3381 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3382 * scenario. Add the missing df cstate disablement here.
3383 */
3384 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3385 dev_warn(adev->dev, "Failed to disallow df cstate");
3386
e7854a03
AD
3387 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3388 if (!adev->ip_blocks[i].status.valid)
3389 continue;
2b9f7848 3390
e7854a03 3391 /* displays are handled separately */
2b9f7848
ND
3392 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3393 continue;
3394
3395 /* XXX handle errors */
3396 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3397 /* XXX handle errors */
3398 if (r) {
3399 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3400 adev->ip_blocks[i].version->funcs->name, r);
3401 return r;
e7854a03 3402 }
2b9f7848
ND
3403
3404 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3405 }
3406
e7854a03
AD
3407 return 0;
3408}
3409
3410/**
3411 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3412 *
3413 * @adev: amdgpu_device pointer
3414 *
3415 * Main suspend function for hardware IPs. The list of all the hardware
3416 * IPs that make up the asic is walked, clockgating is disabled and the
3417 * suspend callbacks are run. suspend puts the hardware and software state
3418 * in each IP into a state suitable for suspend.
3419 * Returns 0 on success, negative error code on failure.
3420 */
3421static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3422{
3423 int i, r;
3424
557f42a2 3425 if (adev->in_s0ix)
bc143d8b 3426 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3427
d38ceaf9 3428 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3429 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3430 continue;
e7854a03
AD
3431 /* displays are handled in phase1 */
3432 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3433 continue;
bff77e86
LM
3434 /* PSP lost connection when err_event_athub occurs */
3435 if (amdgpu_ras_intr_triggered() &&
3436 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3437 adev->ip_blocks[i].status.hw = false;
3438 continue;
3439 }
e3c1b071 3440
3441 /* skip unnecessary suspend if we do not initialize them yet */
3442 if (adev->gmc.xgmi.pending_reset &&
3443 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3444 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3445 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3446 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3447 adev->ip_blocks[i].status.hw = false;
3448 continue;
3449 }
557f42a2 3450
afa6646b 3451 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3452 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3453 * like at runtime. PSP is also part of the always on hardware
3454 * so no need to suspend it.
3455 */
557f42a2 3456 if (adev->in_s0ix &&
32ff160d 3457 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3458 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3459 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3460 continue;
3461
2a7798ea
AD
3462 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3463 if (adev->in_s0ix &&
4e8303cf
LL
3464 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3465 IP_VERSION(5, 0, 0)) &&
3466 (adev->ip_blocks[i].version->type ==
3467 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3468 continue;
3469
e11c7750
TH
3470 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3471 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3472 * from this location and RLC Autoload automatically also gets loaded
3473 * from here based on PMFW -> PSP message during re-init sequence.
3474 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3475 * the TMR and reload FWs again for IMU enabled APU ASICs.
3476 */
3477 if (amdgpu_in_reset(adev) &&
3478 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3479 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3480 continue;
3481
d38ceaf9 3482 /* XXX handle errors */
a1255107 3483 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3484 /* XXX handle errors */
2c1a2784 3485 if (r) {
a1255107
AD
3486 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3487 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3488 }
876923fb 3489 adev->ip_blocks[i].status.hw = false;
a3a09142 3490 /* handle putting the SMC in the appropriate state */
47fc644f 3491 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3492 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3493 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3494 if (r) {
3495 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3496 adev->mp1_state, r);
3497 return r;
3498 }
a3a09142
AD
3499 }
3500 }
d38ceaf9
AD
3501 }
3502
3503 return 0;
3504}
3505
e7854a03
AD
3506/**
3507 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3508 *
3509 * @adev: amdgpu_device pointer
3510 *
3511 * Main suspend function for hardware IPs. The list of all the hardware
3512 * IPs that make up the asic is walked, clockgating is disabled and the
3513 * suspend callbacks are run. suspend puts the hardware and software state
3514 * in each IP into a state suitable for suspend.
3515 * Returns 0 on success, negative error code on failure.
3516 */
3517int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3518{
3519 int r;
3520
3c73683c
JC
3521 if (amdgpu_sriov_vf(adev)) {
3522 amdgpu_virt_fini_data_exchange(adev);
e7819644 3523 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3524 }
e7819644 3525
b7043800
AD
3526 amdgpu_ttm_set_buffer_funcs_status(adev, false);
3527
e7854a03
AD
3528 r = amdgpu_device_ip_suspend_phase1(adev);
3529 if (r)
3530 return r;
3531 r = amdgpu_device_ip_suspend_phase2(adev);
3532
e7819644
YT
3533 if (amdgpu_sriov_vf(adev))
3534 amdgpu_virt_release_full_gpu(adev, false);
3535
e7854a03
AD
3536 return r;
3537}
3538
06ec9070 3539static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3540{
3541 int i, r;
3542
2cb681b6 3543 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3544 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3545 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3546 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3547 AMD_IP_BLOCK_TYPE_IH,
3548 };
a90ad3c2 3549
95ea3dbc 3550 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3551 int j;
3552 struct amdgpu_ip_block *block;
a90ad3c2 3553
4cd2a96d
J
3554 block = &adev->ip_blocks[i];
3555 block->status.hw = false;
2cb681b6 3556
4cd2a96d 3557 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3558
4cd2a96d 3559 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3560 !block->status.valid)
3561 continue;
3562
3563 r = block->version->funcs->hw_init(adev);
0aaeefcc 3564 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3565 if (r)
3566 return r;
482f0e53 3567 block->status.hw = true;
a90ad3c2
ML
3568 }
3569 }
3570
3571 return 0;
3572}
3573
06ec9070 3574static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3575{
3576 int i, r;
3577
2cb681b6
ML
3578 static enum amd_ip_block_type ip_order[] = {
3579 AMD_IP_BLOCK_TYPE_SMC,
3580 AMD_IP_BLOCK_TYPE_DCE,
3581 AMD_IP_BLOCK_TYPE_GFX,
3582 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3583 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3584 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3585 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3586 AMD_IP_BLOCK_TYPE_VCN,
3587 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3588 };
a90ad3c2 3589
2cb681b6
ML
3590 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3591 int j;
3592 struct amdgpu_ip_block *block;
a90ad3c2 3593
2cb681b6
ML
3594 for (j = 0; j < adev->num_ip_blocks; j++) {
3595 block = &adev->ip_blocks[j];
3596
3597 if (block->version->type != ip_order[i] ||
482f0e53
ML
3598 !block->status.valid ||
3599 block->status.hw)
2cb681b6
ML
3600 continue;
3601
895bd048
JZ
3602 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3603 r = block->version->funcs->resume(adev);
3604 else
3605 r = block->version->funcs->hw_init(adev);
3606
0aaeefcc 3607 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3608 if (r)
3609 return r;
482f0e53 3610 block->status.hw = true;
a90ad3c2
ML
3611 }
3612 }
3613
3614 return 0;
3615}
3616
e3ecdffa
AD
3617/**
3618 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3619 *
3620 * @adev: amdgpu_device pointer
3621 *
3622 * First resume function for hardware IPs. The list of all the hardware
3623 * IPs that make up the asic is walked and the resume callbacks are run for
3624 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3625 * after a suspend and updates the software state as necessary. This
3626 * function is also used for restoring the GPU after a GPU reset.
3627 * Returns 0 on success, negative error code on failure.
3628 */
06ec9070 3629static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3630{
3631 int i, r;
3632
a90ad3c2 3633 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3634 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3635 continue;
a90ad3c2 3636 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3637 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3638 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3639 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3640
fcf0649f
CZ
3641 r = adev->ip_blocks[i].version->funcs->resume(adev);
3642 if (r) {
3643 DRM_ERROR("resume of IP block <%s> failed %d\n",
3644 adev->ip_blocks[i].version->funcs->name, r);
3645 return r;
3646 }
482f0e53 3647 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3648 }
3649 }
3650
3651 return 0;
3652}
3653
e3ecdffa
AD
3654/**
3655 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3656 *
3657 * @adev: amdgpu_device pointer
3658 *
3659 * First resume function for hardware IPs. The list of all the hardware
3660 * IPs that make up the asic is walked and the resume callbacks are run for
3661 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3662 * functional state after a suspend and updates the software state as
3663 * necessary. This function is also used for restoring the GPU after a GPU
3664 * reset.
3665 * Returns 0 on success, negative error code on failure.
3666 */
06ec9070 3667static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3668{
3669 int i, r;
3670
3671 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3672 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3673 continue;
fcf0649f 3674 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3675 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3676 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3677 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3678 continue;
a1255107 3679 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3680 if (r) {
a1255107
AD
3681 DRM_ERROR("resume of IP block <%s> failed %d\n",
3682 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3683 return r;
2c1a2784 3684 }
482f0e53 3685 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3686 }
3687
3688 return 0;
3689}
3690
e3ecdffa
AD
3691/**
3692 * amdgpu_device_ip_resume - run resume for hardware IPs
3693 *
3694 * @adev: amdgpu_device pointer
3695 *
3696 * Main resume function for hardware IPs. The hardware IPs
3697 * are split into two resume functions because they are
b8920e1e 3698 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3699 * steps need to be take between them. In this case (S3/S4) they are
3700 * run sequentially.
3701 * Returns 0 on success, negative error code on failure.
3702 */
06ec9070 3703static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3704{
3705 int r;
3706
06ec9070 3707 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3708 if (r)
3709 return r;
7a3e0bb2
RZ
3710
3711 r = amdgpu_device_fw_loading(adev);
3712 if (r)
3713 return r;
3714
06ec9070 3715 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f 3716
b7043800
AD
3717 if (adev->mman.buffer_funcs_ring->sched.ready)
3718 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3719
fcf0649f
CZ
3720 return r;
3721}
3722
e3ecdffa
AD
3723/**
3724 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3725 *
3726 * @adev: amdgpu_device pointer
3727 *
3728 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3729 */
4e99a44e 3730static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3731{
6867e1b5
ML
3732 if (amdgpu_sriov_vf(adev)) {
3733 if (adev->is_atom_fw) {
58ff791a 3734 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3735 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3736 } else {
3737 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3738 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3739 }
3740
3741 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3742 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3743 }
048765ad
AR
3744}
3745
e3ecdffa
AD
3746/**
3747 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3748 *
3749 * @asic_type: AMD asic type
3750 *
3751 * Check if there is DC (new modesetting infrastructre) support for an asic.
3752 * returns true if DC has support, false if not.
3753 */
4562236b
HW
3754bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3755{
3756 switch (asic_type) {
0637d417
AD
3757#ifdef CONFIG_DRM_AMDGPU_SI
3758 case CHIP_HAINAN:
3759#endif
3760 case CHIP_TOPAZ:
3761 /* chips with no display hardware */
3762 return false;
4562236b 3763#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3764 case CHIP_TAHITI:
3765 case CHIP_PITCAIRN:
3766 case CHIP_VERDE:
3767 case CHIP_OLAND:
2d32ffd6
AD
3768 /*
3769 * We have systems in the wild with these ASICs that require
3770 * LVDS and VGA support which is not supported with DC.
3771 *
3772 * Fallback to the non-DC driver here by default so as not to
3773 * cause regressions.
3774 */
3775#if defined(CONFIG_DRM_AMD_DC_SI)
3776 return amdgpu_dc > 0;
3777#else
3778 return false;
64200c46 3779#endif
4562236b 3780 case CHIP_BONAIRE:
0d6fbccb 3781 case CHIP_KAVERI:
367e6687
AD
3782 case CHIP_KABINI:
3783 case CHIP_MULLINS:
d9fda248
HW
3784 /*
3785 * We have systems in the wild with these ASICs that require
b5a0168e 3786 * VGA support which is not supported with DC.
d9fda248
HW
3787 *
3788 * Fallback to the non-DC driver here by default so as not to
3789 * cause regressions.
3790 */
3791 return amdgpu_dc > 0;
f7f12b25 3792 default:
fd187853 3793 return amdgpu_dc != 0;
f7f12b25 3794#else
4562236b 3795 default:
93b09a9a 3796 if (amdgpu_dc > 0)
b8920e1e 3797 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3798 return false;
f7f12b25 3799#endif
4562236b
HW
3800 }
3801}
3802
3803/**
3804 * amdgpu_device_has_dc_support - check if dc is supported
3805 *
982a820b 3806 * @adev: amdgpu_device pointer
4562236b
HW
3807 *
3808 * Returns true for supported, false for not supported
3809 */
3810bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3811{
25263da3 3812 if (adev->enable_virtual_display ||
abaf210c 3813 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3814 return false;
3815
4562236b
HW
3816 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3817}
3818
d4535e2c
AG
3819static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3820{
3821 struct amdgpu_device *adev =
3822 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3823 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3824
c6a6e2db
AG
3825 /* It's a bug to not have a hive within this function */
3826 if (WARN_ON(!hive))
3827 return;
3828
3829 /*
3830 * Use task barrier to synchronize all xgmi reset works across the
3831 * hive. task_barrier_enter and task_barrier_exit will block
3832 * until all the threads running the xgmi reset works reach
3833 * those points. task_barrier_full will do both blocks.
3834 */
3835 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3836
3837 task_barrier_enter(&hive->tb);
4a580877 3838 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3839
3840 if (adev->asic_reset_res)
3841 goto fail;
3842
3843 task_barrier_exit(&hive->tb);
4a580877 3844 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3845
3846 if (adev->asic_reset_res)
3847 goto fail;
43c4d576 3848
21226f02 3849 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
c6a6e2db
AG
3850 } else {
3851
3852 task_barrier_full(&hive->tb);
3853 adev->asic_reset_res = amdgpu_asic_reset(adev);
3854 }
ce316fa5 3855
c6a6e2db 3856fail:
d4535e2c 3857 if (adev->asic_reset_res)
fed184e9 3858 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3859 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3860 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3861}
3862
71f98027
AD
3863static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3864{
3865 char *input = amdgpu_lockup_timeout;
3866 char *timeout_setting = NULL;
3867 int index = 0;
3868 long timeout;
3869 int ret = 0;
3870
3871 /*
67387dfe
AD
3872 * By default timeout for non compute jobs is 10000
3873 * and 60000 for compute jobs.
71f98027 3874 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3875 * jobs are 60000 by default.
71f98027
AD
3876 */
3877 adev->gfx_timeout = msecs_to_jiffies(10000);
3878 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3879 if (amdgpu_sriov_vf(adev))
3880 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3881 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3882 else
67387dfe 3883 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3884
f440ff44 3885 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3886 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3887 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3888 ret = kstrtol(timeout_setting, 0, &timeout);
3889 if (ret)
3890 return ret;
3891
3892 if (timeout == 0) {
3893 index++;
3894 continue;
3895 } else if (timeout < 0) {
3896 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3897 dev_warn(adev->dev, "lockup timeout disabled");
3898 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3899 } else {
3900 timeout = msecs_to_jiffies(timeout);
3901 }
3902
3903 switch (index++) {
3904 case 0:
3905 adev->gfx_timeout = timeout;
3906 break;
3907 case 1:
3908 adev->compute_timeout = timeout;
3909 break;
3910 case 2:
3911 adev->sdma_timeout = timeout;
3912 break;
3913 case 3:
3914 adev->video_timeout = timeout;
3915 break;
3916 default:
3917 break;
3918 }
3919 }
3920 /*
3921 * There is only one value specified and
3922 * it should apply to all non-compute jobs.
3923 */
bcccee89 3924 if (index == 1) {
71f98027 3925 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3926 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3927 adev->compute_timeout = adev->gfx_timeout;
3928 }
71f98027
AD
3929 }
3930
3931 return ret;
3932}
d4535e2c 3933
4a74c38c
PY
3934/**
3935 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3936 *
3937 * @adev: amdgpu_device pointer
3938 *
3939 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3940 */
3941static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3942{
3943 struct iommu_domain *domain;
3944
3945 domain = iommu_get_domain_for_dev(adev->dev);
3946 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3947 adev->ram_is_direct_mapped = true;
3948}
3949
77f3a5cd 3950static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3951 &dev_attr_pcie_replay_count.attr,
3952 NULL
3953};
3954
02ff519e
AD
3955static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3956{
3957 if (amdgpu_mcbp == 1)
3958 adev->gfx.mcbp = true;
1e9e15dc
JZ
3959 else if (amdgpu_mcbp == 0)
3960 adev->gfx.mcbp = false;
50a7c876 3961
02ff519e
AD
3962 if (amdgpu_sriov_vf(adev))
3963 adev->gfx.mcbp = true;
3964
3965 if (adev->gfx.mcbp)
3966 DRM_INFO("MCBP is enabled\n");
3967}
3968
d38ceaf9
AD
3969/**
3970 * amdgpu_device_init - initialize the driver
3971 *
3972 * @adev: amdgpu_device pointer
d38ceaf9
AD
3973 * @flags: driver flags
3974 *
3975 * Initializes the driver info and hw (all asics).
3976 * Returns 0 for success or an error on failure.
3977 * Called at driver startup.
3978 */
3979int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3980 uint32_t flags)
3981{
8aba21b7
LT
3982 struct drm_device *ddev = adev_to_drm(adev);
3983 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3984 int r, i;
b98c6299 3985 bool px = false;
95844d20 3986 u32 max_MBps;
59e9fff1 3987 int tmp;
d38ceaf9
AD
3988
3989 adev->shutdown = false;
d38ceaf9 3990 adev->flags = flags;
4e66d7d2
YZ
3991
3992 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3993 adev->asic_type = amdgpu_force_asic_type;
3994 else
3995 adev->asic_type = flags & AMD_ASIC_MASK;
3996
d38ceaf9 3997 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3998 if (amdgpu_emu_mode == 1)
8bdab6bb 3999 adev->usec_timeout *= 10;
770d13b1 4000 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
4001 adev->accel_working = false;
4002 adev->num_rings = 0;
68ce8b24 4003 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
4004 adev->mman.buffer_funcs = NULL;
4005 adev->mman.buffer_funcs_ring = NULL;
4006 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 4007 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 4008 adev->gmc.gmc_funcs = NULL;
7bd939d0 4009 adev->harvest_ip_mask = 0x0;
f54d1867 4010 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 4011 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
4012
4013 adev->smc_rreg = &amdgpu_invalid_rreg;
4014 adev->smc_wreg = &amdgpu_invalid_wreg;
4015 adev->pcie_rreg = &amdgpu_invalid_rreg;
4016 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
4017 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4018 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
4019 adev->pciep_rreg = &amdgpu_invalid_rreg;
4020 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
4021 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4022 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
4023 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4024 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
4025 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4026 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4027 adev->didt_rreg = &amdgpu_invalid_rreg;
4028 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
4029 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4030 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
4031 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4032 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4033
3e39ab90
AD
4034 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4035 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4036 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
4037
4038 /* mutex initialization are all done here so we
b8920e1e
SS
4039 * can recall function without having locking issues
4040 */
0e5ca0d1 4041 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
4042 mutex_init(&adev->pm.mutex);
4043 mutex_init(&adev->gfx.gpu_clock_mutex);
4044 mutex_init(&adev->srbm_mutex);
b8866c26 4045 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 4046 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 4047 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 4048 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 4049 mutex_init(&adev->mn_lock);
e23b74aa 4050 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 4051 hash_init(adev->mn_hash);
32eaeae0 4052 mutex_init(&adev->psp.mutex);
bd052211 4053 mutex_init(&adev->notifier_lock);
8cda7a4f 4054 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 4055 mutex_init(&adev->benchmark_mutex);
d38ceaf9 4056
ab3b9de6 4057 amdgpu_device_init_apu_flags(adev);
9f6a7857 4058
912dfc84
EQ
4059 r = amdgpu_device_check_arguments(adev);
4060 if (r)
4061 return r;
d38ceaf9 4062
d38ceaf9
AD
4063 spin_lock_init(&adev->mmio_idx_lock);
4064 spin_lock_init(&adev->smc_idx_lock);
4065 spin_lock_init(&adev->pcie_idx_lock);
4066 spin_lock_init(&adev->uvd_ctx_idx_lock);
4067 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 4068 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 4069 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 4070 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 4071 spin_lock_init(&adev->mm_stats.lock);
497d7cee 4072 spin_lock_init(&adev->wb.lock);
d38ceaf9 4073
0c4e7fa5
CZ
4074 INIT_LIST_HEAD(&adev->shadow_list);
4075 mutex_init(&adev->shadow_list_lock);
4076
655ce9cb 4077 INIT_LIST_HEAD(&adev->reset_list);
4078
6492e1b0 4079 INIT_LIST_HEAD(&adev->ras_list);
4080
3e38b634
EQ
4081 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4082
beff74bc
AD
4083 INIT_DELAYED_WORK(&adev->delayed_init_work,
4084 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
4085 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4086 amdgpu_device_delay_enable_gfx_off);
2dc80b00 4087
d4535e2c
AG
4088 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4089
d23ee13f 4090 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
4091 adev->gfx.gfx_off_residency = 0;
4092 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 4093 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 4094
b265bdbd
EQ
4095 atomic_set(&adev->throttling_logging_enabled, 1);
4096 /*
4097 * If throttling continues, logging will be performed every minute
4098 * to avoid log flooding. "-1" is subtracted since the thermal
4099 * throttling interrupt comes every second. Thus, the total logging
4100 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4101 * for throttling interrupt) = 60 seconds.
4102 */
4103 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4104 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4105
0fa49558
AX
4106 /* Registers mapping */
4107 /* TODO: block userspace mapping of io register */
da69c161
KW
4108 if (adev->asic_type >= CHIP_BONAIRE) {
4109 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4110 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4111 } else {
4112 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4113 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4114 }
d38ceaf9 4115
6c08e0ef
EQ
4116 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4117 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4118
d38ceaf9 4119 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 4120 if (!adev->rmmio)
d38ceaf9 4121 return -ENOMEM;
b8920e1e 4122
d38ceaf9 4123 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 4124 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 4125
436afdfa
PY
4126 /*
4127 * Reset domain needs to be present early, before XGMI hive discovered
4128 * (if any) and intitialized to use reset sem and in_gpu reset flag
4129 * early on during init and before calling to RREG32.
4130 */
4131 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
03c6284d
MJ
4132 if (!adev->reset_domain)
4133 return -ENOMEM;
436afdfa 4134
3aa0115d
ML
4135 /* detect hw virtualization here */
4136 amdgpu_detect_virtualization(adev);
4137
04e85958
TL
4138 amdgpu_device_get_pcie_info(adev);
4139
dffa11b4
ML
4140 r = amdgpu_device_get_job_timeout_settings(adev);
4141 if (r) {
4142 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
03c6284d 4143 return r;
a190d1c7
XY
4144 }
4145
bf909454
PEPP
4146 amdgpu_device_set_mcbp(adev);
4147
d38ceaf9 4148 /* early init functions */
06ec9070 4149 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 4150 if (r)
03c6284d 4151 return r;
d38ceaf9 4152
b7cdb41e
ML
4153 /* Get rid of things like offb */
4154 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4155 if (r)
03c6284d 4156 return r;
b7cdb41e 4157
4d33e704
SK
4158 /* Enable TMZ based on IP_VERSION */
4159 amdgpu_gmc_tmz_set(adev);
4160
3e2dacca
DS
4161 if (amdgpu_sriov_vf(adev) &&
4162 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4163 /* VF MMIO access (except mailbox range) from CPU
4164 * will be blocked during sriov runtime
4165 */
4166 adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4167
957b0787 4168 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
4169 /* Need to get xgmi info early to decide the reset behavior*/
4170 if (adev->gmc.xgmi.supported) {
4171 r = adev->gfxhub.funcs->get_xgmi_info(adev);
4172 if (r)
03c6284d 4173 return r;
4a0165f0
VS
4174 }
4175
8e6d0b69 4176 /* enable PCIE atomic ops */
b4520bfd
GW
4177 if (amdgpu_sriov_vf(adev)) {
4178 if (adev->virt.fw_reserve.p_pf2vf)
4179 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4180 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4181 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
4182 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4183 * internal path natively support atomics, set have_atomics_support to true.
4184 */
b4520bfd 4185 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
4186 (amdgpu_ip_version(adev, GC_HWIP, 0) >
4187 IP_VERSION(9, 0, 0))) {
0e768043 4188 adev->have_atomics_support = true;
b4520bfd 4189 } else {
8e6d0b69 4190 adev->have_atomics_support =
4191 !pci_enable_atomic_ops_to_root(adev->pdev,
4192 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4193 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
4194 }
4195
8e6d0b69 4196 if (!adev->have_atomics_support)
4197 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4198
6585661d 4199 /* doorbell bar mapping and doorbell index init*/
43c064db 4200 amdgpu_doorbell_init(adev);
6585661d 4201
9475a943
SL
4202 if (amdgpu_emu_mode == 1) {
4203 /* post the asic on emulation mode */
4204 emu_soc_asic_init(adev);
bfca0289 4205 goto fence_driver_init;
9475a943 4206 }
bfca0289 4207
04442bf7
LL
4208 amdgpu_reset_init(adev);
4209
4e99a44e 4210 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
4211 if (adev->bios)
4212 amdgpu_device_detect_sriov_bios(adev);
048765ad 4213
95e8e59e
AD
4214 /* check if we need to reset the asic
4215 * E.g., driver was not cleanly unloaded previously, etc.
4216 */
f14899fd 4217 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 4218 if (adev->gmc.xgmi.num_physical_nodes) {
4219 dev_info(adev->dev, "Pending hive reset.\n");
4220 adev->gmc.xgmi.pending_reset = true;
4221 /* Only need to init necessary block for SMU to handle the reset */
4222 for (i = 0; i < adev->num_ip_blocks; i++) {
4223 if (!adev->ip_blocks[i].status.valid)
4224 continue;
4225 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4226 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4227 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4228 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 4229 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 4230 adev->ip_blocks[i].version->funcs->name);
4231 adev->ip_blocks[i].status.hw = true;
4232 }
4233 }
7c1d9e10
KF
4234 } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4235 !amdgpu_device_has_display_hardware(adev)) {
4236 r = psp_gpu_reset(adev);
e3c1b071 4237 } else {
7c1d9e10
KF
4238 tmp = amdgpu_reset_method;
4239 /* It should do a default reset when loading or reloading the driver,
4240 * regardless of the module parameter reset_method.
4241 */
4242 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4243 r = amdgpu_asic_reset(adev);
4244 amdgpu_reset_method = tmp;
4245 }
4246
4247 if (r) {
4248 dev_err(adev->dev, "asic reset on init failed\n");
4249 goto failed;
95e8e59e
AD
4250 }
4251 }
4252
d38ceaf9 4253 /* Post card if necessary */
39c640c0 4254 if (amdgpu_device_need_post(adev)) {
d38ceaf9 4255 if (!adev->bios) {
bec86378 4256 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
4257 r = -EINVAL;
4258 goto failed;
d38ceaf9 4259 }
bec86378 4260 DRM_INFO("GPU posting now...\n");
4d2997ab 4261 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
4262 if (r) {
4263 dev_err(adev->dev, "gpu post error!\n");
4264 goto failed;
4265 }
d38ceaf9
AD
4266 }
4267
9535a86a
SZ
4268 if (adev->bios) {
4269 if (adev->is_atom_fw) {
4270 /* Initialize clocks */
4271 r = amdgpu_atomfirmware_get_clock_info(adev);
4272 if (r) {
4273 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4274 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4275 goto failed;
4276 }
4277 } else {
4278 /* Initialize clocks */
4279 r = amdgpu_atombios_get_clock_info(adev);
4280 if (r) {
4281 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4282 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4283 goto failed;
4284 }
4285 /* init i2c buses */
4286 if (!amdgpu_device_has_dc_support(adev))
4287 amdgpu_atombios_i2c_init(adev);
a5bde2f9 4288 }
2c1a2784 4289 }
d38ceaf9 4290
bfca0289 4291fence_driver_init:
d38ceaf9 4292 /* Fence driver */
067f44c8 4293 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4294 if (r) {
067f44c8 4295 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4296 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4297 goto failed;
2c1a2784 4298 }
d38ceaf9
AD
4299
4300 /* init the mode config */
4a580877 4301 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4302
06ec9070 4303 r = amdgpu_device_ip_init(adev);
d38ceaf9 4304 if (r) {
06ec9070 4305 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4306 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4307 goto release_ras_con;
d38ceaf9
AD
4308 }
4309
8d35a259
LG
4310 amdgpu_fence_driver_hw_init(adev);
4311
d69b8971
YZ
4312 dev_info(adev->dev,
4313 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4314 adev->gfx.config.max_shader_engines,
4315 adev->gfx.config.max_sh_per_se,
4316 adev->gfx.config.max_cu_per_sh,
4317 adev->gfx.cu_info.number);
4318
d38ceaf9
AD
4319 adev->accel_working = true;
4320
e59c0205
AX
4321 amdgpu_vm_check_compute_bug(adev);
4322
95844d20
MO
4323 /* Initialize the buffer migration limit. */
4324 if (amdgpu_moverate >= 0)
4325 max_MBps = amdgpu_moverate;
4326 else
4327 max_MBps = 8; /* Allow 8 MB/s. */
4328 /* Get a log2 for easy divisions. */
4329 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4330
b0adca4d
EQ
4331 /*
4332 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4333 * Otherwise the mgpu fan boost feature will be skipped due to the
4334 * gpu instance is counted less.
4335 */
4336 amdgpu_register_gpu_instance(adev);
4337
d38ceaf9
AD
4338 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4339 * explicit gating rather than handling it automatically.
4340 */
e3c1b071 4341 if (!adev->gmc.xgmi.pending_reset) {
4342 r = amdgpu_device_ip_late_init(adev);
4343 if (r) {
4344 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4345 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4346 goto release_ras_con;
e3c1b071 4347 }
4348 /* must succeed. */
4349 amdgpu_ras_resume(adev);
4350 queue_delayed_work(system_wq, &adev->delayed_init_work,
4351 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4352 }
d38ceaf9 4353
38eecbe0
CL
4354 if (amdgpu_sriov_vf(adev)) {
4355 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4356 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4357 }
2c738637 4358
90bcb9b5
EQ
4359 /*
4360 * Place those sysfs registering after `late_init`. As some of those
4361 * operations performed in `late_init` might affect the sysfs
4362 * interfaces creating.
4363 */
4364 r = amdgpu_atombios_sysfs_init(adev);
4365 if (r)
4366 drm_err(&adev->ddev,
4367 "registering atombios sysfs failed (%d).\n", r);
4368
4369 r = amdgpu_pm_sysfs_init(adev);
4370 if (r)
4371 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4372
4373 r = amdgpu_ucode_sysfs_init(adev);
4374 if (r) {
4375 adev->ucode_sysfs_en = false;
4376 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4377 } else
4378 adev->ucode_sysfs_en = true;
4379
77f3a5cd 4380 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4381 if (r)
77f3a5cd 4382 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4383
76da73f0
LL
4384 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4385 if (r)
4386 dev_err(adev->dev,
4387 "Could not create amdgpu board attributes\n");
4388
7957ec80 4389 amdgpu_fru_sysfs_init(adev);
af39e6f4 4390 amdgpu_reg_state_sysfs_init(adev);
7957ec80 4391
d155bef0
AB
4392 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4393 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4394 if (r)
4395 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4396
c1dd4aa6
AG
4397 /* Have stored pci confspace at hand for restore in sudden PCI error */
4398 if (amdgpu_device_cache_pci_state(adev->pdev))
4399 pci_restore_state(pdev);
4400
8c3dd61c
KHF
4401 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4402 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4403 * ignore it
4404 */
8c3dd61c 4405 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4406 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4407
d37a3929
OC
4408 px = amdgpu_device_supports_px(ddev);
4409
7b1c6263 4410 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4411 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4412 vga_switcheroo_register_client(adev->pdev,
4413 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4414
4415 if (px)
8c3dd61c 4416 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4417
e3c1b071 4418 if (adev->gmc.xgmi.pending_reset)
4419 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4420 msecs_to_jiffies(AMDGPU_RESUME_MS));
4421
4a74c38c
PY
4422 amdgpu_device_check_iommu_direct_map(adev);
4423
d38ceaf9 4424 return 0;
83ba126a 4425
970fd197 4426release_ras_con:
38eecbe0
CL
4427 if (amdgpu_sriov_vf(adev))
4428 amdgpu_virt_release_full_gpu(adev, true);
4429
4430 /* failed in exclusive mode due to timeout */
4431 if (amdgpu_sriov_vf(adev) &&
4432 !amdgpu_sriov_runtime(adev) &&
4433 amdgpu_virt_mmio_blocked(adev) &&
4434 !amdgpu_virt_wait_reset(adev)) {
4435 dev_err(adev->dev, "VF exclusive mode timeout\n");
4436 /* Don't send request since VF is inactive. */
4437 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4438 adev->virt.ops = NULL;
4439 r = -EAGAIN;
4440 }
970fd197
SY
4441 amdgpu_release_ras_context(adev);
4442
83ba126a 4443failed:
89041940 4444 amdgpu_vf_error_trans_all(adev);
8840a387 4445
83ba126a 4446 return r;
d38ceaf9
AD
4447}
4448
07775fc1
AG
4449static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4450{
62d5f9f7 4451
07775fc1
AG
4452 /* Clear all CPU mappings pointing to this device */
4453 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4454
4455 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4456 amdgpu_doorbell_fini(adev);
07775fc1
AG
4457
4458 iounmap(adev->rmmio);
4459 adev->rmmio = NULL;
4460 if (adev->mman.aper_base_kaddr)
4461 iounmap(adev->mman.aper_base_kaddr);
4462 adev->mman.aper_base_kaddr = NULL;
4463
4464 /* Memory manager related */
a0ba1279 4465 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4466 arch_phys_wc_del(adev->gmc.vram_mtrr);
4467 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4468 }
4469}
4470
d38ceaf9 4471/**
bbe04dec 4472 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4473 *
4474 * @adev: amdgpu_device pointer
4475 *
4476 * Tear down the driver info (all asics).
4477 * Called at driver shutdown.
4478 */
72c8c97b 4479void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4480{
aac89168 4481 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4482 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4483 adev->shutdown = true;
9f875167 4484
752c683d
ML
4485 /* make sure IB test finished before entering exclusive mode
4486 * to avoid preemption on IB test
b8920e1e 4487 */
519b8b76 4488 if (amdgpu_sriov_vf(adev)) {
752c683d 4489 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4490 amdgpu_virt_fini_data_exchange(adev);
4491 }
752c683d 4492
e5b03032
ML
4493 /* disable all interrupts */
4494 amdgpu_irq_disable_all(adev);
47fc644f 4495 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4496 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4497 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4498 else
4a580877 4499 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4500 }
8d35a259 4501 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4502
cd3a8a59 4503 if (adev->mman.initialized)
9bff18d1 4504 drain_workqueue(adev->mman.bdev.wq);
98f56188 4505
53e9d836 4506 if (adev->pm.sysfs_initialized)
7c868b59 4507 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4508 if (adev->ucode_sysfs_en)
4509 amdgpu_ucode_sysfs_fini(adev);
4510 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4511 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4512
af39e6f4
LL
4513 amdgpu_reg_state_sysfs_fini(adev);
4514
232d1d43
SY
4515 /* disable ras feature must before hw fini */
4516 amdgpu_ras_pre_fini(adev);
4517
b7043800
AD
4518 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4519
e9669fb7 4520 amdgpu_device_ip_fini_early(adev);
d10d0daa 4521
a3848df6
YW
4522 amdgpu_irq_fini_hw(adev);
4523
b6fd6e0f
SK
4524 if (adev->mman.initialized)
4525 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4526
d10d0daa 4527 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4528
39934d3e
VP
4529 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4530 amdgpu_device_unmap_mmio(adev);
87172e89 4531
72c8c97b
AG
4532}
4533
4534void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4535{
62d5f9f7 4536 int idx;
d37a3929 4537 bool px;
62d5f9f7 4538
8d35a259 4539 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4540 amdgpu_device_ip_fini(adev);
b31d3063 4541 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4542 adev->accel_working = false;
68ce8b24 4543 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4544
4545 amdgpu_reset_fini(adev);
4546
d38ceaf9 4547 /* free i2c buses */
4562236b
HW
4548 if (!amdgpu_device_has_dc_support(adev))
4549 amdgpu_i2c_fini(adev);
bfca0289
SL
4550
4551 if (amdgpu_emu_mode != 1)
4552 amdgpu_atombios_fini(adev);
4553
d38ceaf9
AD
4554 kfree(adev->bios);
4555 adev->bios = NULL;
d37a3929 4556
8a2b5139
LL
4557 kfree(adev->fru_info);
4558 adev->fru_info = NULL;
4559
d37a3929
OC
4560 px = amdgpu_device_supports_px(adev_to_drm(adev));
4561
7b1c6263 4562 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4563 apple_gmux_detect(NULL, NULL)))
84c8b22e 4564 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4565
4566 if (px)
83ba126a 4567 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4568
38d6be81 4569 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4570 vga_client_unregister(adev->pdev);
e9bc1bf7 4571
62d5f9f7
LS
4572 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4573
4574 iounmap(adev->rmmio);
4575 adev->rmmio = NULL;
43c064db 4576 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4577 drm_dev_exit(idx);
4578 }
4579
d155bef0
AB
4580 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4581 amdgpu_pmu_fini(adev);
72de33f8 4582 if (adev->mman.discovery_bin)
a190d1c7 4583 amdgpu_discovery_fini(adev);
72c8c97b 4584
cfbb6b00
AG
4585 amdgpu_reset_put_reset_domain(adev->reset_domain);
4586 adev->reset_domain = NULL;
4587
72c8c97b
AG
4588 kfree(adev->pci_state);
4589
d38ceaf9
AD
4590}
4591
58144d28
ND
4592/**
4593 * amdgpu_device_evict_resources - evict device resources
4594 * @adev: amdgpu device object
4595 *
4596 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4597 * of the vram memory type. Mainly used for evicting device resources
4598 * at suspend time.
4599 *
4600 */
7863c155 4601static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4602{
7863c155
ML
4603 int ret;
4604
e53d9665
ML
4605 /* No need to evict vram on APUs for suspend to ram or s2idle */
4606 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4607 return 0;
58144d28 4608
7863c155
ML
4609 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4610 if (ret)
58144d28 4611 DRM_WARN("evicting device resources failed\n");
7863c155 4612 return ret;
58144d28 4613}
d38ceaf9
AD
4614
4615/*
4616 * Suspend & resume.
4617 */
5095d541
ML
4618/**
4619 * amdgpu_device_prepare - prepare for device suspend
4620 *
4621 * @dev: drm dev pointer
4622 *
4623 * Prepare to put the hw in the suspend state (all asics).
4624 * Returns 0 for success or an error on failure.
4625 * Called at driver suspend.
4626 */
4627int amdgpu_device_prepare(struct drm_device *dev)
4628{
4629 struct amdgpu_device *adev = drm_to_adev(dev);
cb11ca32 4630 int i, r;
5095d541 4631
226db360
ML
4632 amdgpu_choose_low_power_state(adev);
4633
5095d541
ML
4634 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4635 return 0;
4636
4637 /* Evict the majority of BOs before starting suspend sequence */
4638 r = amdgpu_device_evict_resources(adev);
4639 if (r)
226db360 4640 goto unprepare;
5095d541 4641
0355b24b
ML
4642 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4643
cb11ca32
ML
4644 for (i = 0; i < adev->num_ip_blocks; i++) {
4645 if (!adev->ip_blocks[i].status.valid)
4646 continue;
4647 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4648 continue;
4649 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4650 if (r)
226db360 4651 goto unprepare;
cb11ca32
ML
4652 }
4653
5095d541 4654 return 0;
226db360
ML
4655
4656unprepare:
4657 adev->in_s0ix = adev->in_s3 = false;
4658
4659 return r;
5095d541
ML
4660}
4661
d38ceaf9 4662/**
810ddc3a 4663 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4664 *
87e3f136 4665 * @dev: drm dev pointer
87e3f136 4666 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4667 *
4668 * Puts the hw in the suspend state (all asics).
4669 * Returns 0 for success or an error on failure.
4670 * Called at driver suspend.
4671 */
de185019 4672int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4673{
a2e15b0e 4674 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4675 int r = 0;
d38ceaf9 4676
d38ceaf9
AD
4677 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4678 return 0;
4679
44779b43 4680 adev->in_suspend = true;
3fa8f89d 4681
d7274ec7
BZ
4682 if (amdgpu_sriov_vf(adev)) {
4683 amdgpu_virt_fini_data_exchange(adev);
4684 r = amdgpu_virt_request_full_gpu(adev, false);
4685 if (r)
4686 return r;
4687 }
4688
3fa8f89d
S
4689 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4690 DRM_WARN("smart shift update failed\n");
4691
5f818173 4692 if (fbcon)
087451f3 4693 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4694
beff74bc 4695 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4696
5e6932fe 4697 amdgpu_ras_suspend(adev);
4698
2196927b 4699 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4700
c004d44e 4701 if (!adev->in_s0ix)
5d3a2d95 4702 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4703
7863c155
ML
4704 r = amdgpu_device_evict_resources(adev);
4705 if (r)
4706 return r;
d38ceaf9 4707
dab96d8b
AD
4708 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4709
8d35a259 4710 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4711
2196927b 4712 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4713
d7274ec7
BZ
4714 if (amdgpu_sriov_vf(adev))
4715 amdgpu_virt_release_full_gpu(adev, false);
4716
2e9b1523
PY
4717 r = amdgpu_dpm_notify_rlc_state(adev, false);
4718 if (r)
4719 return r;
4720
d38ceaf9
AD
4721 return 0;
4722}
4723
4724/**
810ddc3a 4725 * amdgpu_device_resume - initiate device resume
d38ceaf9 4726 *
87e3f136 4727 * @dev: drm dev pointer
87e3f136 4728 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4729 *
4730 * Bring the hw back to operating state (all asics).
4731 * Returns 0 for success or an error on failure.
4732 * Called at driver resume.
4733 */
de185019 4734int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4735{
1348969a 4736 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4737 int r = 0;
d38ceaf9 4738
d7274ec7
BZ
4739 if (amdgpu_sriov_vf(adev)) {
4740 r = amdgpu_virt_request_full_gpu(adev, true);
4741 if (r)
4742 return r;
4743 }
4744
d38ceaf9
AD
4745 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4746 return 0;
4747
62498733 4748 if (adev->in_s0ix)
bc143d8b 4749 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4750
d38ceaf9 4751 /* post card */
39c640c0 4752 if (amdgpu_device_need_post(adev)) {
4d2997ab 4753 r = amdgpu_device_asic_init(adev);
74b0b157 4754 if (r)
aac89168 4755 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4756 }
d38ceaf9 4757
06ec9070 4758 r = amdgpu_device_ip_resume(adev);
d7274ec7 4759
e6707218 4760 if (r) {
aac89168 4761 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4762 goto exit;
e6707218 4763 }
8d35a259 4764 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4765
c004d44e 4766 if (!adev->in_s0ix) {
5d3a2d95
AD
4767 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4768 if (r)
3c22c1ea 4769 goto exit;
5d3a2d95 4770 }
756e6880 4771
8ed79c40
TH
4772 r = amdgpu_device_ip_late_init(adev);
4773 if (r)
4774 goto exit;
4775
4776 queue_delayed_work(system_wq, &adev->delayed_init_work,
4777 msecs_to_jiffies(AMDGPU_RESUME_MS));
3c22c1ea
SF
4778exit:
4779 if (amdgpu_sriov_vf(adev)) {
4780 amdgpu_virt_init_data_exchange(adev);
4781 amdgpu_virt_release_full_gpu(adev, true);
4782 }
4783
4784 if (r)
4785 return r;
4786
96a5d8d4 4787 /* Make sure IB tests flushed */
beff74bc 4788 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4789
a2e15b0e 4790 if (fbcon)
087451f3 4791 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4792
5e6932fe 4793 amdgpu_ras_resume(adev);
4794
d09ef243
AD
4795 if (adev->mode_info.num_crtc) {
4796 /*
4797 * Most of the connector probing functions try to acquire runtime pm
4798 * refs to ensure that the GPU is powered on when connector polling is
4799 * performed. Since we're calling this from a runtime PM callback,
4800 * trying to acquire rpm refs will cause us to deadlock.
4801 *
4802 * Since we're guaranteed to be holding the rpm lock, it's safe to
4803 * temporarily disable the rpm helpers so this doesn't deadlock us.
4804 */
23a1a9e5 4805#ifdef CONFIG_PM
d09ef243 4806 dev->dev->power.disable_depth++;
23a1a9e5 4807#endif
d09ef243
AD
4808 if (!adev->dc_enabled)
4809 drm_helper_hpd_irq_event(dev);
4810 else
4811 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4812#ifdef CONFIG_PM
d09ef243 4813 dev->dev->power.disable_depth--;
23a1a9e5 4814#endif
d09ef243 4815 }
44779b43
RZ
4816 adev->in_suspend = false;
4817
dc907c9d
JX
4818 if (adev->enable_mes)
4819 amdgpu_mes_self_test(adev);
4820
3fa8f89d
S
4821 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4822 DRM_WARN("smart shift update failed\n");
4823
4d3b9ae5 4824 return 0;
d38ceaf9
AD
4825}
4826
e3ecdffa
AD
4827/**
4828 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4829 *
4830 * @adev: amdgpu_device pointer
4831 *
4832 * The list of all the hardware IPs that make up the asic is walked and
4833 * the check_soft_reset callbacks are run. check_soft_reset determines
4834 * if the asic is still hung or not.
4835 * Returns true if any of the IPs are still in a hung state, false if not.
4836 */
06ec9070 4837static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4838{
4839 int i;
4840 bool asic_hang = false;
4841
f993d628
ML
4842 if (amdgpu_sriov_vf(adev))
4843 return true;
4844
8bc04c29
AD
4845 if (amdgpu_asic_need_full_reset(adev))
4846 return true;
4847
63fbf42f 4848 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4849 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4850 continue;
a1255107
AD
4851 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4852 adev->ip_blocks[i].status.hang =
4853 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4854 if (adev->ip_blocks[i].status.hang) {
aac89168 4855 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4856 asic_hang = true;
4857 }
4858 }
4859 return asic_hang;
4860}
4861
e3ecdffa
AD
4862/**
4863 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4864 *
4865 * @adev: amdgpu_device pointer
4866 *
4867 * The list of all the hardware IPs that make up the asic is walked and the
4868 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4869 * handles any IP specific hardware or software state changes that are
4870 * necessary for a soft reset to succeed.
4871 * Returns 0 on success, negative error code on failure.
4872 */
06ec9070 4873static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4874{
4875 int i, r = 0;
4876
4877 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4878 if (!adev->ip_blocks[i].status.valid)
d31a501e 4879 continue;
a1255107
AD
4880 if (adev->ip_blocks[i].status.hang &&
4881 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4882 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4883 if (r)
4884 return r;
4885 }
4886 }
4887
4888 return 0;
4889}
4890
e3ecdffa
AD
4891/**
4892 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4893 *
4894 * @adev: amdgpu_device pointer
4895 *
4896 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4897 * reset is necessary to recover.
4898 * Returns true if a full asic reset is required, false if not.
4899 */
06ec9070 4900static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4901{
da146d3b
AD
4902 int i;
4903
8bc04c29
AD
4904 if (amdgpu_asic_need_full_reset(adev))
4905 return true;
4906
da146d3b 4907 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4908 if (!adev->ip_blocks[i].status.valid)
da146d3b 4909 continue;
a1255107
AD
4910 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4911 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4912 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4913 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4914 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4915 if (adev->ip_blocks[i].status.hang) {
aac89168 4916 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4917 return true;
4918 }
4919 }
35d782fe
CZ
4920 }
4921 return false;
4922}
4923
e3ecdffa
AD
4924/**
4925 * amdgpu_device_ip_soft_reset - do a soft reset
4926 *
4927 * @adev: amdgpu_device pointer
4928 *
4929 * The list of all the hardware IPs that make up the asic is walked and the
4930 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4931 * IP specific hardware or software state changes that are necessary to soft
4932 * reset the IP.
4933 * Returns 0 on success, negative error code on failure.
4934 */
06ec9070 4935static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4936{
4937 int i, r = 0;
4938
4939 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4940 if (!adev->ip_blocks[i].status.valid)
35d782fe 4941 continue;
a1255107
AD
4942 if (adev->ip_blocks[i].status.hang &&
4943 adev->ip_blocks[i].version->funcs->soft_reset) {
4944 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4945 if (r)
4946 return r;
4947 }
4948 }
4949
4950 return 0;
4951}
4952
e3ecdffa
AD
4953/**
4954 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4955 *
4956 * @adev: amdgpu_device pointer
4957 *
4958 * The list of all the hardware IPs that make up the asic is walked and the
4959 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4960 * handles any IP specific hardware or software state changes that are
4961 * necessary after the IP has been soft reset.
4962 * Returns 0 on success, negative error code on failure.
4963 */
06ec9070 4964static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4965{
4966 int i, r = 0;
4967
4968 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4969 if (!adev->ip_blocks[i].status.valid)
35d782fe 4970 continue;
a1255107
AD
4971 if (adev->ip_blocks[i].status.hang &&
4972 adev->ip_blocks[i].version->funcs->post_soft_reset)
4973 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4974 if (r)
4975 return r;
4976 }
4977
4978 return 0;
4979}
4980
e3ecdffa 4981/**
c33adbc7 4982 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4983 *
4984 * @adev: amdgpu_device pointer
4985 *
4986 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4987 * restore things like GPUVM page tables after a GPU reset where
4988 * the contents of VRAM might be lost.
403009bf
CK
4989 *
4990 * Returns:
4991 * 0 on success, negative error code on failure.
e3ecdffa 4992 */
c33adbc7 4993static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4994{
c41d1cf6 4995 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4996 struct amdgpu_bo *shadow;
e18aaea7 4997 struct amdgpu_bo_vm *vmbo;
403009bf 4998 long r = 1, tmo;
c41d1cf6
ML
4999
5000 if (amdgpu_sriov_runtime(adev))
b045d3af 5001 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
5002 else
5003 tmo = msecs_to_jiffies(100);
5004
aac89168 5005 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 5006 mutex_lock(&adev->shadow_list_lock);
e18aaea7 5007 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
5008 /* If vm is compute context or adev is APU, shadow will be NULL */
5009 if (!vmbo->shadow)
5010 continue;
5011 shadow = vmbo->shadow;
5012
403009bf 5013 /* No need to recover an evicted BO */
d3116756
CK
5014 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
5015 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
5016 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
5017 continue;
5018
5019 r = amdgpu_bo_restore_shadow(shadow, &next);
5020 if (r)
5021 break;
5022
c41d1cf6 5023 if (fence) {
1712fb1a 5024 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
5025 dma_fence_put(fence);
5026 fence = next;
1712fb1a 5027 if (tmo == 0) {
5028 r = -ETIMEDOUT;
c41d1cf6 5029 break;
1712fb1a 5030 } else if (tmo < 0) {
5031 r = tmo;
5032 break;
5033 }
403009bf
CK
5034 } else {
5035 fence = next;
c41d1cf6 5036 }
c41d1cf6
ML
5037 }
5038 mutex_unlock(&adev->shadow_list_lock);
5039
403009bf
CK
5040 if (fence)
5041 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
5042 dma_fence_put(fence);
5043
1712fb1a 5044 if (r < 0 || tmo <= 0) {
aac89168 5045 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
5046 return -EIO;
5047 }
c41d1cf6 5048
aac89168 5049 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 5050 return 0;
c41d1cf6
ML
5051}
5052
a90ad3c2 5053
e3ecdffa 5054/**
06ec9070 5055 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 5056 *
982a820b 5057 * @adev: amdgpu_device pointer
87e3f136 5058 * @from_hypervisor: request from hypervisor
5740682e
ML
5059 *
5060 * do VF FLR and reinitialize Asic
3f48c681 5061 * return 0 means succeeded otherwise failed
e3ecdffa
AD
5062 */
5063static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5064 bool from_hypervisor)
5740682e
ML
5065{
5066 int r;
a5f67c93 5067 struct amdgpu_hive_info *hive = NULL;
7258fa31 5068 int retry_limit = 0;
5740682e 5069
7258fa31 5070retry:
c004d44e 5071 amdgpu_amdkfd_pre_reset(adev);
428890a3 5072
ab66c832
ZL
5073 amdgpu_device_stop_pending_resets(adev);
5074
5740682e
ML
5075 if (from_hypervisor)
5076 r = amdgpu_virt_request_full_gpu(adev, true);
5077 else
5078 r = amdgpu_virt_reset_gpu(adev);
5079 if (r)
5080 return r;
d1999b40 5081 amdgpu_ras_set_fed(adev, false);
f734b213 5082 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 5083
83f24a8f
HC
5084 /* some sw clean up VF needs to do before recover */
5085 amdgpu_virt_post_reset(adev);
5086
a90ad3c2 5087 /* Resume IP prior to SMC */
06ec9070 5088 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
5089 if (r)
5090 goto error;
a90ad3c2 5091
c9ffa427 5092 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 5093
7a3e0bb2
RZ
5094 r = amdgpu_device_fw_loading(adev);
5095 if (r)
5096 return r;
5097
a90ad3c2 5098 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 5099 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
5100 if (r)
5101 goto error;
a90ad3c2 5102
a5f67c93
ZL
5103 hive = amdgpu_get_xgmi_hive(adev);
5104 /* Update PSP FW topology after reset */
5105 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5106 r = amdgpu_xgmi_update_topology(hive, adev);
5107
5108 if (hive)
5109 amdgpu_put_xgmi_hive(hive);
5110
5111 if (!r) {
a5f67c93 5112 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 5113
c004d44e 5114 amdgpu_amdkfd_post_reset(adev);
a5f67c93 5115 }
a90ad3c2 5116
abc34253 5117error:
c41d1cf6 5118 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 5119 amdgpu_inc_vram_lost(adev);
c33adbc7 5120 r = amdgpu_device_recover_vram(adev);
a90ad3c2 5121 }
437f3e0b 5122 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 5123
7258fa31
SK
5124 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5125 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5126 retry_limit++;
5127 goto retry;
5128 } else
5129 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5130 }
5131
a90ad3c2
ML
5132 return r;
5133}
5134
9a1cddd6 5135/**
5136 * amdgpu_device_has_job_running - check if there is any job in mirror list
5137 *
982a820b 5138 * @adev: amdgpu_device pointer
9a1cddd6 5139 *
5140 * check if there is any job in mirror list
5141 */
5142bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5143{
5144 int i;
5145 struct drm_sched_job *job;
5146
5147 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5148 struct amdgpu_ring *ring = adev->rings[i];
5149
9749c868 5150 if (!amdgpu_ring_sched_ready(ring))
9a1cddd6 5151 continue;
5152
5153 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
5154 job = list_first_entry_or_null(&ring->sched.pending_list,
5155 struct drm_sched_job, list);
9a1cddd6 5156 spin_unlock(&ring->sched.job_list_lock);
5157 if (job)
5158 return true;
5159 }
5160 return false;
5161}
5162
12938fad
CK
5163/**
5164 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5165 *
982a820b 5166 * @adev: amdgpu_device pointer
12938fad
CK
5167 *
5168 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5169 * a hung GPU.
5170 */
5171bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5172{
12938fad 5173
3ba7b418
AG
5174 if (amdgpu_gpu_recovery == 0)
5175 goto disabled;
5176
1a11a65d
YC
5177 /* Skip soft reset check in fatal error mode */
5178 if (!amdgpu_ras_is_poison_mode_supported(adev))
5179 return true;
5180
3ba7b418
AG
5181 if (amdgpu_sriov_vf(adev))
5182 return true;
5183
5184 if (amdgpu_gpu_recovery == -1) {
5185 switch (adev->asic_type) {
b3523c45
AD
5186#ifdef CONFIG_DRM_AMDGPU_SI
5187 case CHIP_VERDE:
5188 case CHIP_TAHITI:
5189 case CHIP_PITCAIRN:
5190 case CHIP_OLAND:
5191 case CHIP_HAINAN:
5192#endif
5193#ifdef CONFIG_DRM_AMDGPU_CIK
5194 case CHIP_KAVERI:
5195 case CHIP_KABINI:
5196 case CHIP_MULLINS:
5197#endif
5198 case CHIP_CARRIZO:
5199 case CHIP_STONEY:
5200 case CHIP_CYAN_SKILLFISH:
3ba7b418 5201 goto disabled;
b3523c45
AD
5202 default:
5203 break;
3ba7b418 5204 }
12938fad
CK
5205 }
5206
5207 return true;
3ba7b418
AG
5208
5209disabled:
aac89168 5210 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 5211 return false;
12938fad
CK
5212}
5213
5c03e584
FX
5214int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5215{
47fc644f
SS
5216 u32 i;
5217 int ret = 0;
5c03e584 5218
47fc644f 5219 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 5220
47fc644f 5221 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 5222
47fc644f
SS
5223 /* disable BM */
5224 pci_clear_master(adev->pdev);
5c03e584 5225
47fc644f 5226 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 5227
47fc644f
SS
5228 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5229 dev_info(adev->dev, "GPU smu mode1 reset\n");
5230 ret = amdgpu_dpm_mode1_reset(adev);
5231 } else {
5232 dev_info(adev->dev, "GPU psp mode1 reset\n");
5233 ret = psp_gpu_reset(adev);
5234 }
5c03e584 5235
47fc644f 5236 if (ret)
7d442437 5237 goto mode1_reset_failed;
5c03e584 5238
47fc644f 5239 amdgpu_device_load_pci_state(adev->pdev);
7656168a
LL
5240 ret = amdgpu_psp_wait_for_bootloader(adev);
5241 if (ret)
7d442437 5242 goto mode1_reset_failed;
5c03e584 5243
47fc644f
SS
5244 /* wait for asic to come out of reset */
5245 for (i = 0; i < adev->usec_timeout; i++) {
5246 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 5247
47fc644f
SS
5248 if (memsize != 0xffffffff)
5249 break;
5250 udelay(1);
5251 }
5c03e584 5252
7d442437
HZ
5253 if (i >= adev->usec_timeout) {
5254 ret = -ETIMEDOUT;
5255 goto mode1_reset_failed;
5256 }
5257
47fc644f 5258 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
7656168a 5259
7d442437
HZ
5260 return 0;
5261
5262mode1_reset_failed:
5263 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 5264 return ret;
5c03e584 5265}
5c6dd71e 5266
e3c1b071 5267int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 5268 struct amdgpu_reset_context *reset_context)
26bc5340 5269{
5c1e6fa4 5270 int i, r = 0;
04442bf7
LL
5271 struct amdgpu_job *job = NULL;
5272 bool need_full_reset =
5273 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5274
5275 if (reset_context->reset_req_dev == adev)
5276 job = reset_context->job;
71182665 5277
b602ca5f
TZ
5278 if (amdgpu_sriov_vf(adev)) {
5279 /* stop the data exchange thread */
5280 amdgpu_virt_fini_data_exchange(adev);
5281 }
5282
9e225fb9
AG
5283 amdgpu_fence_driver_isr_toggle(adev, true);
5284
71182665 5285 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
5286 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5287 struct amdgpu_ring *ring = adev->rings[i];
5288
9749c868 5289 if (!amdgpu_ring_sched_ready(ring))
0875dc9e 5290 continue;
5740682e 5291
b8920e1e
SS
5292 /* Clear job fence from fence drv to avoid force_completion
5293 * leave NULL and vm flush fence in fence drv
5294 */
5c1e6fa4 5295 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 5296
2f9d4084
ML
5297 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5298 amdgpu_fence_driver_force_completion(ring);
0875dc9e 5299 }
d38ceaf9 5300
9e225fb9
AG
5301 amdgpu_fence_driver_isr_toggle(adev, false);
5302
ff99849b 5303 if (job && job->vm)
222b5f04
AG
5304 drm_sched_increase_karma(&job->base);
5305
04442bf7 5306 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 5307 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5308 if (r == -EOPNOTSUPP)
404b277b
LL
5309 r = 0;
5310 else
04442bf7
LL
5311 return r;
5312
1d721ed6 5313 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
5314 if (!amdgpu_sriov_vf(adev)) {
5315
5316 if (!need_full_reset)
5317 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5318
360cd081
LG
5319 if (!need_full_reset && amdgpu_gpu_recovery &&
5320 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
5321 amdgpu_device_ip_pre_soft_reset(adev);
5322 r = amdgpu_device_ip_soft_reset(adev);
5323 amdgpu_device_ip_post_soft_reset(adev);
5324 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 5325 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5326 need_full_reset = true;
5327 }
5328 }
5329
5330 if (need_full_reset)
5331 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5332 if (need_full_reset)
5333 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5334 else
5335 clear_bit(AMDGPU_NEED_FULL_RESET,
5336 &reset_context->flags);
26bc5340
AG
5337 }
5338
5339 return r;
5340}
5341
15fd09a0
SA
5342static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5343{
15fd09a0
SA
5344 int i;
5345
38a15ad9 5346 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0 5347
2d6a2a28
AA
5348 for (i = 0; i < adev->reset_info.num_regs; i++) {
5349 adev->reset_info.reset_dump_reg_value[i] =
5350 RREG32(adev->reset_info.reset_dump_reg_list[i]);
3d8785f6 5351
2d6a2a28
AA
5352 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5353 adev->reset_info.reset_dump_reg_value[i]);
3d8785f6
SA
5354 }
5355
15fd09a0 5356 return 0;
3d8785f6 5357}
3d8785f6 5358
04442bf7
LL
5359int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5360 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5361{
5362 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5363 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5364 int r = 0;
e043a35d 5365 uint32_t i;
26bc5340 5366
04442bf7
LL
5367 /* Try reset handler method first */
5368 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5369 reset_list);
ea137071 5370
2a8f7464 5371 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
ea137071 5372 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35 5373
2a8f7464
SK
5374 /* Trigger ip dump before we reset the asic */
5375 for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5376 if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5377 tmp_adev->ip_blocks[i].version->funcs
5378 ->dump_ip_state((void *)tmp_adev);
5379 }
e043a35d 5380
0a83bb35 5381 reset_context->reset_device_list = device_list_handle;
04442bf7 5382 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5383 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5384 if (r == -EOPNOTSUPP)
404b277b
LL
5385 r = 0;
5386 else
04442bf7
LL
5387 return r;
5388
5389 /* Reset handler not implemented, use the default method */
5390 need_full_reset =
5391 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5392 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5393
26bc5340 5394 /*
655ce9cb 5395 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5396 * to allow proper links negotiation in FW (within 1 sec)
5397 */
7ac71382 5398 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5399 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5400 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5401 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5402 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5403 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5404 r = -EALREADY;
5405 } else
5406 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5407
041a62bc 5408 if (r) {
aac89168 5409 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5410 r, adev_to_drm(tmp_adev)->unique);
19349072 5411 goto out;
ce316fa5
LM
5412 }
5413 }
5414
041a62bc
AG
5415 /* For XGMI wait for all resets to complete before proceed */
5416 if (!r) {
655ce9cb 5417 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5418 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5419 flush_work(&tmp_adev->xgmi_reset_work);
5420 r = tmp_adev->asic_reset_res;
5421 if (r)
5422 break;
ce316fa5
LM
5423 }
5424 }
5425 }
ce316fa5 5426 }
26bc5340 5427
43c4d576 5428 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5429 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
21226f02 5430 amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
43c4d576
JC
5431 }
5432
00eaa571 5433 amdgpu_ras_intr_cleared();
43c4d576 5434 }
00eaa571 5435
655ce9cb 5436 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5437 if (need_full_reset) {
5438 /* post card */
1b6ef74b 5439 amdgpu_ras_set_fed(tmp_adev, false);
e3c1b071 5440 r = amdgpu_device_asic_init(tmp_adev);
5441 if (r) {
aac89168 5442 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5443 } else {
26bc5340 5444 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5445
26bc5340
AG
5446 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5447 if (r)
5448 goto out;
5449
5450 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785 5451
ea137071
AR
5452 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5453 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
a7691785 5454
26bc5340 5455 if (vram_lost) {
77e7f829 5456 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5457 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5458 }
5459
26bc5340
AG
5460 r = amdgpu_device_fw_loading(tmp_adev);
5461 if (r)
5462 return r;
5463
c45e38f2
LL
5464 r = amdgpu_xcp_restore_partition_mode(
5465 tmp_adev->xcp_mgr);
5466 if (r)
5467 goto out;
5468
26bc5340
AG
5469 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5470 if (r)
5471 goto out;
5472
b7043800
AD
5473 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5474 amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5475
26bc5340
AG
5476 if (vram_lost)
5477 amdgpu_device_fill_reset_magic(tmp_adev);
5478
fdafb359
EQ
5479 /*
5480 * Add this ASIC as tracked as reset was already
5481 * complete successfully.
5482 */
5483 amdgpu_register_gpu_instance(tmp_adev);
5484
04442bf7
LL
5485 if (!reset_context->hive &&
5486 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5487 amdgpu_xgmi_add_device(tmp_adev);
5488
7c04ca50 5489 r = amdgpu_device_ip_late_init(tmp_adev);
5490 if (r)
5491 goto out;
5492
087451f3 5493 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5494
e8fbaf03
GC
5495 /*
5496 * The GPU enters bad state once faulty pages
5497 * by ECC has reached the threshold, and ras
5498 * recovery is scheduled next. So add one check
5499 * here to break recovery if it indeed exceeds
5500 * bad page threshold, and remind user to
5501 * retire this GPU or setting one bigger
5502 * bad_page_threshold value to fix this once
5503 * probing driver again.
5504 */
11003c68 5505 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5506 /* must succeed. */
5507 amdgpu_ras_resume(tmp_adev);
5508 } else {
5509 r = -EINVAL;
5510 goto out;
5511 }
e79a04d5 5512
26bc5340 5513 /* Update PSP FW topology after reset */
04442bf7
LL
5514 if (reset_context->hive &&
5515 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5516 r = amdgpu_xgmi_update_topology(
5517 reset_context->hive, tmp_adev);
26bc5340
AG
5518 }
5519 }
5520
26bc5340
AG
5521out:
5522 if (!r) {
5523 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5524 r = amdgpu_ib_ring_tests(tmp_adev);
5525 if (r) {
5526 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5527 need_full_reset = true;
5528 r = -EAGAIN;
5529 goto end;
5530 }
5531 }
5532
5533 if (!r)
5534 r = amdgpu_device_recover_vram(tmp_adev);
5535 else
5536 tmp_adev->asic_reset_res = r;
5537 }
5538
5539end:
04442bf7
LL
5540 if (need_full_reset)
5541 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5542 else
5543 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5544 return r;
5545}
5546
e923be99 5547static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5548{
5740682e 5549
a3a09142
AD
5550 switch (amdgpu_asic_reset_method(adev)) {
5551 case AMD_RESET_METHOD_MODE1:
5552 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5553 break;
5554 case AMD_RESET_METHOD_MODE2:
5555 adev->mp1_state = PP_MP1_STATE_RESET;
5556 break;
5557 default:
5558 adev->mp1_state = PP_MP1_STATE_NONE;
5559 break;
5560 }
26bc5340 5561}
d38ceaf9 5562
e923be99 5563static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5564{
89041940 5565 amdgpu_vf_error_trans_all(adev);
a3a09142 5566 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5567}
5568
3f12acc8
EQ
5569static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5570{
5571 struct pci_dev *p = NULL;
5572
5573 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5574 adev->pdev->bus->number, 1);
5575 if (p) {
5576 pm_runtime_enable(&(p->dev));
5577 pm_runtime_resume(&(p->dev));
5578 }
b85e285e
YY
5579
5580 pci_dev_put(p);
3f12acc8
EQ
5581}
5582
5583static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5584{
5585 enum amd_reset_method reset_method;
5586 struct pci_dev *p = NULL;
5587 u64 expires;
5588
5589 /*
5590 * For now, only BACO and mode1 reset are confirmed
5591 * to suffer the audio issue without proper suspended.
5592 */
5593 reset_method = amdgpu_asic_reset_method(adev);
5594 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5595 (reset_method != AMD_RESET_METHOD_MODE1))
5596 return -EINVAL;
5597
5598 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5599 adev->pdev->bus->number, 1);
5600 if (!p)
5601 return -ENODEV;
5602
5603 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5604 if (!expires)
5605 /*
5606 * If we cannot get the audio device autosuspend delay,
5607 * a fixed 4S interval will be used. Considering 3S is
5608 * the audio controller default autosuspend delay setting.
5609 * 4S used here is guaranteed to cover that.
5610 */
54b7feb9 5611 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5612
5613 while (!pm_runtime_status_suspended(&(p->dev))) {
5614 if (!pm_runtime_suspend(&(p->dev)))
5615 break;
5616
5617 if (expires < ktime_get_mono_fast_ns()) {
5618 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5619 pci_dev_put(p);
3f12acc8
EQ
5620 /* TODO: abort the succeeding gpu reset? */
5621 return -ETIMEDOUT;
5622 }
5623 }
5624
5625 pm_runtime_disable(&(p->dev));
5626
b85e285e 5627 pci_dev_put(p);
3f12acc8
EQ
5628 return 0;
5629}
5630
d193b12b 5631static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5632{
5633 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5634
5635#if defined(CONFIG_DEBUG_FS)
5636 if (!amdgpu_sriov_vf(adev))
5637 cancel_work(&adev->reset_work);
5638#endif
5639
5640 if (adev->kfd.dev)
5641 cancel_work(&adev->kfd.reset_work);
5642
5643 if (amdgpu_sriov_vf(adev))
5644 cancel_work(&adev->virt.flr_work);
5645
5646 if (con && adev->ras_enabled)
5647 cancel_work(&con->recovery_work);
5648
5649}
5650
dfe9c3cd
LL
5651static int amdgpu_device_health_check(struct list_head *device_list_handle)
5652{
5653 struct amdgpu_device *tmp_adev;
5654 int ret = 0;
5655 u32 status;
5656
5657 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5658 pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5659 if (PCI_POSSIBLE_ERROR(status)) {
5660 dev_err(tmp_adev->dev, "device lost from bus!");
5661 ret = -ENODEV;
5662 }
5663 }
5664
5665 return ret;
5666}
5667
26bc5340 5668/**
6e9c65f7 5669 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5670 *
982a820b 5671 * @adev: amdgpu_device pointer
26bc5340 5672 * @job: which job trigger hang
80bd2de1 5673 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5674 *
5675 * Attempt to reset the GPU if it has hung (all asics).
5676 * Attempt to do soft-reset or full-reset and reinitialize Asic
5677 * Returns 0 for success or an error on failure.
5678 */
5679
cf727044 5680int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5681 struct amdgpu_job *job,
5682 struct amdgpu_reset_context *reset_context)
26bc5340 5683{
1d721ed6 5684 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5685 bool job_signaled = false;
26bc5340 5686 struct amdgpu_hive_info *hive = NULL;
26bc5340 5687 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5688 int i, r = 0;
bb5c7235 5689 bool need_emergency_restart = false;
3f12acc8 5690 bool audio_suspended = false;
26bc5340 5691
6e3cd2a9 5692 /*
bb5c7235
WS
5693 * Special case: RAS triggered and full reset isn't supported
5694 */
5695 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5696
d5ea093e
AG
5697 /*
5698 * Flush RAM to disk so that after reboot
5699 * the user can read log and see why the system rebooted.
5700 */
80285ae1
SY
5701 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5702 amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5703 DRM_WARN("Emergency reboot.");
5704
5705 ksys_sync_helper();
5706 emergency_restart();
5707 }
5708
b823821f 5709 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5710 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5711
175ac6ec
ZL
5712 if (!amdgpu_sriov_vf(adev))
5713 hive = amdgpu_get_xgmi_hive(adev);
681260df 5714 if (hive)
53b3f8f4 5715 mutex_lock(&hive->hive_lock);
26bc5340 5716
f1549c09
LG
5717 reset_context->job = job;
5718 reset_context->hive = hive;
9e94d22c
EQ
5719 /*
5720 * Build list of devices to reset.
5721 * In case we are in XGMI hive mode, resort the device list
5722 * to put adev in the 1st position.
5723 */
5724 INIT_LIST_HEAD(&device_list);
175ac6ec 5725 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5726 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5727 list_add_tail(&tmp_adev->reset_list, &device_list);
087a3e13 5728 if (adev->shutdown)
83d29a5f
YC
5729 tmp_adev->shutdown = true;
5730 }
655ce9cb 5731 if (!list_is_first(&adev->reset_list, &device_list))
5732 list_rotate_to_front(&adev->reset_list, &device_list);
5733 device_list_handle = &device_list;
26bc5340 5734 } else {
655ce9cb 5735 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5736 device_list_handle = &device_list;
5737 }
5738
dfe9c3cd
LL
5739 if (!amdgpu_sriov_vf(adev)) {
5740 r = amdgpu_device_health_check(device_list_handle);
5741 if (r)
5742 goto end_reset;
5743 }
5744
e923be99
AG
5745 /* We need to lock reset domain only once both for XGMI and single device */
5746 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5747 reset_list);
3675c2f2 5748 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5749
1d721ed6 5750 /* block all schedulers and reset given job's ring */
655ce9cb 5751 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5752
e923be99 5753 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5754
3f12acc8
EQ
5755 /*
5756 * Try to put the audio codec into suspend state
5757 * before gpu reset started.
5758 *
5759 * Due to the power domain of the graphics device
5760 * is shared with AZ power domain. Without this,
5761 * we may change the audio hardware from behind
5762 * the audio driver's back. That will trigger
5763 * some audio codec errors.
5764 */
5765 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5766 audio_suspended = true;
5767
9e94d22c
EQ
5768 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5769
52fb44cf
EQ
5770 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5771
c004d44e 5772 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5773 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5774
12ffa55d
AG
5775 /*
5776 * Mark these ASICs to be reseted as untracked first
5777 * And add them back after reset completed
5778 */
5779 amdgpu_unregister_gpu_instance(tmp_adev);
5780
163d4cd2 5781 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5782
f1c1314b 5783 /* disable ras on ALL IPs */
bb5c7235 5784 if (!need_emergency_restart &&
b823821f 5785 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5786 amdgpu_ras_suspend(tmp_adev);
5787
1d721ed6
AG
5788 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5789 struct amdgpu_ring *ring = tmp_adev->rings[i];
5790
9749c868 5791 if (!amdgpu_ring_sched_ready(ring))
1d721ed6
AG
5792 continue;
5793
0b2d2c2e 5794 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5795
bb5c7235 5796 if (need_emergency_restart)
7c6e68c7 5797 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5798 }
8f8c80f4 5799 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5800 }
5801
bb5c7235 5802 if (need_emergency_restart)
7c6e68c7
AG
5803 goto skip_sched_resume;
5804
1d721ed6
AG
5805 /*
5806 * Must check guilty signal here since after this point all old
5807 * HW fences are force signaled.
5808 *
5809 * job->base holds a reference to parent fence
5810 */
f6a3f660 5811 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5812 job_signaled = true;
1d721ed6
AG
5813 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5814 goto skip_hw_reset;
5815 }
5816
26bc5340 5817retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5818 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f1549c09 5819 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5820 /*TODO Should we stop ?*/
5821 if (r) {
aac89168 5822 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5823 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5824 tmp_adev->asic_reset_res = r;
5825 }
247c7b0d 5826
ab66c832
ZL
5827 if (!amdgpu_sriov_vf(tmp_adev))
5828 /*
5829 * Drop all pending non scheduler resets. Scheduler resets
5830 * were already dropped during drm_sched_stop
5831 */
5832 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5833 }
5834
5835 /* Actual ASIC resets if needed.*/
4f30d920 5836 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5837 if (amdgpu_sriov_vf(adev)) {
5838 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5839 if (r)
5840 adev->asic_reset_res = r;
950d6425 5841
28606c4e 5842 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5843 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5844 IP_VERSION(9, 4, 2) ||
adb4d6a4 5845 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
4e8303cf 5846 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5847 amdgpu_ras_resume(adev);
26bc5340 5848 } else {
f1549c09 5849 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5850 if (r && r == -EAGAIN)
26bc5340
AG
5851 goto retry;
5852 }
5853
1d721ed6
AG
5854skip_hw_reset:
5855
26bc5340 5856 /* Post ASIC reset for all devs .*/
655ce9cb 5857 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5858
1d721ed6
AG
5859 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5860 struct amdgpu_ring *ring = tmp_adev->rings[i];
5861
9749c868 5862 if (!amdgpu_ring_sched_ready(ring))
1d721ed6
AG
5863 continue;
5864
6868a2c4 5865 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5866 }
5867
b8920e1e 5868 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5869 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5870
7258fa31
SK
5871 if (tmp_adev->asic_reset_res)
5872 r = tmp_adev->asic_reset_res;
5873
1d721ed6 5874 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5875
5876 if (r) {
5877 /* bad news, how to tell it to userspace ? */
12ffa55d 5878 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5879 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5880 } else {
12ffa55d 5881 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5882 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5883 DRM_WARN("smart shift update failed\n");
26bc5340 5884 }
7c6e68c7 5885 }
26bc5340 5886
7c6e68c7 5887skip_sched_resume:
655ce9cb 5888 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5889 /* unlock kfd: SRIOV would do it separately */
c004d44e 5890 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5891 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5892
5893 /* kfd_post_reset will do nothing if kfd device is not initialized,
5894 * need to bring up kfd here if it's not be initialized before
5895 */
5896 if (!adev->kfd.init_complete)
5897 amdgpu_amdkfd_device_init(adev);
5898
3f12acc8
EQ
5899 if (audio_suspended)
5900 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5901
5902 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5903
5904 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5905 }
5906
e923be99
AG
5907 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5908 reset_list);
5909 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5910
dfe9c3cd 5911end_reset:
9e94d22c 5912 if (hive) {
9e94d22c 5913 mutex_unlock(&hive->hive_lock);
d95e8e97 5914 amdgpu_put_xgmi_hive(hive);
9e94d22c 5915 }
26bc5340 5916
f287a3c5 5917 if (r)
26bc5340 5918 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5919
5920 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5921 return r;
5922}
5923
466a7d11
ML
5924/**
5925 * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5926 *
5927 * @adev: amdgpu_device pointer
5928 * @speed: pointer to the speed of the link
5929 * @width: pointer to the width of the link
5930 *
5931 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5932 * first physical partner to an AMD dGPU.
5933 * This will exclude any virtual switches and links.
5934 */
5935static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5936 enum pci_bus_speed *speed,
5937 enum pcie_link_width *width)
5938{
5939 struct pci_dev *parent = adev->pdev;
5940
5941 if (!speed || !width)
5942 return;
5943
5944 *speed = PCI_SPEED_UNKNOWN;
5945 *width = PCIE_LNK_WIDTH_UNKNOWN;
5946
ba46b3bd
AD
5947 if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5948 while ((parent = pci_upstream_bridge(parent))) {
5949 /* skip upstream/downstream switches internal to dGPU*/
5950 if (parent->vendor == PCI_VENDOR_ID_ATI)
5951 continue;
5952 *speed = pcie_get_speed_cap(parent);
5953 *width = pcie_get_width_cap(parent);
5954 break;
5955 }
5956 } else {
5957 /* use the current speeds rather than max if switching is not supported */
5958 pcie_bandwidth_available(adev->pdev, NULL, speed, width);
466a7d11
ML
5959 }
5960}
5961
e3ecdffa
AD
5962/**
5963 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5964 *
5965 * @adev: amdgpu_device pointer
5966 *
5967 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5968 * and lanes) of the slot the device is in. Handles APUs and
5969 * virtualized environments where PCIE config space may not be available.
5970 */
5494d864 5971static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5972{
5d9a6330 5973 struct pci_dev *pdev;
c5313457
HK
5974 enum pci_bus_speed speed_cap, platform_speed_cap;
5975 enum pcie_link_width platform_link_width;
d0dd7f0c 5976
cd474ba0
AD
5977 if (amdgpu_pcie_gen_cap)
5978 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5979
cd474ba0
AD
5980 if (amdgpu_pcie_lane_cap)
5981 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5982
cd474ba0 5983 /* covers APUs as well */
04e85958 5984 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5985 if (adev->pm.pcie_gen_mask == 0)
5986 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5987 if (adev->pm.pcie_mlw_mask == 0)
5988 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5989 return;
cd474ba0 5990 }
d0dd7f0c 5991
c5313457
HK
5992 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5993 return;
5994
466a7d11
ML
5995 amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5996 &platform_link_width);
c5313457 5997
cd474ba0 5998 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5999 /* asic caps */
6000 pdev = adev->pdev;
6001 speed_cap = pcie_get_speed_cap(pdev);
6002 if (speed_cap == PCI_SPEED_UNKNOWN) {
6003 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
6004 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6005 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 6006 } else {
2b3a1f51
FX
6007 if (speed_cap == PCIE_SPEED_32_0GT)
6008 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6009 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6010 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6011 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6012 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6013 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
6014 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6015 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6016 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6017 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6018 else if (speed_cap == PCIE_SPEED_8_0GT)
6019 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6020 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6021 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6022 else if (speed_cap == PCIE_SPEED_5_0GT)
6023 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6024 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6025 else
6026 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6027 }
6028 /* platform caps */
c5313457 6029 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
6030 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6031 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6032 } else {
2b3a1f51
FX
6033 if (platform_speed_cap == PCIE_SPEED_32_0GT)
6034 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6035 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6036 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6037 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6038 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6039 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
6040 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6041 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6042 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6043 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 6044 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
6045 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6046 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6047 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 6048 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
6049 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6050 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6051 else
6052 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6053
cd474ba0
AD
6054 }
6055 }
6056 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 6057 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
6058 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6059 } else {
c5313457 6060 switch (platform_link_width) {
5d9a6330 6061 case PCIE_LNK_X32:
cd474ba0
AD
6062 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6063 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6064 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6065 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6066 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6067 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6068 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6069 break;
5d9a6330 6070 case PCIE_LNK_X16:
cd474ba0
AD
6071 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6072 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6073 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6074 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6075 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6076 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6077 break;
5d9a6330 6078 case PCIE_LNK_X12:
cd474ba0
AD
6079 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6080 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6081 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6082 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6083 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6084 break;
5d9a6330 6085 case PCIE_LNK_X8:
cd474ba0
AD
6086 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6087 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6088 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6089 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6090 break;
5d9a6330 6091 case PCIE_LNK_X4:
cd474ba0
AD
6092 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6093 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6094 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6095 break;
5d9a6330 6096 case PCIE_LNK_X2:
cd474ba0
AD
6097 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6098 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6099 break;
5d9a6330 6100 case PCIE_LNK_X1:
cd474ba0
AD
6101 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6102 break;
6103 default:
6104 break;
6105 }
d0dd7f0c
AD
6106 }
6107 }
6108}
d38ceaf9 6109
08a2fd23
RE
6110/**
6111 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6112 *
6113 * @adev: amdgpu_device pointer
6114 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6115 *
6116 * Return true if @peer_adev can access (DMA) @adev through the PCIe
6117 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6118 * @peer_adev.
6119 */
6120bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6121 struct amdgpu_device *peer_adev)
6122{
6123#ifdef CONFIG_HSA_AMD_P2P
6124 uint64_t address_mask = peer_adev->dev->dma_mask ?
6125 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6126 resource_size_t aper_limit =
6127 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
6128 bool p2p_access =
6129 !adev->gmc.xgmi.connected_to_cpu &&
6130 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
6131
6132 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6133 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6134 !(adev->gmc.aper_base & address_mask ||
6135 aper_limit & address_mask));
6136#else
6137 return false;
6138#endif
6139}
6140
361dbd01
AD
6141int amdgpu_device_baco_enter(struct drm_device *dev)
6142{
1348969a 6143 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 6144 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 6145
6ab68650 6146 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
6147 return -ENOTSUPP;
6148
8ab0d6f0 6149 if (ras && adev->ras_enabled &&
acdae216 6150 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
6151 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6152
9530273e 6153 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
6154}
6155
6156int amdgpu_device_baco_exit(struct drm_device *dev)
6157{
1348969a 6158 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 6159 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 6160 int ret = 0;
361dbd01 6161
6ab68650 6162 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
6163 return -ENOTSUPP;
6164
9530273e
EQ
6165 ret = amdgpu_dpm_baco_exit(adev);
6166 if (ret)
6167 return ret;
7a22677b 6168
8ab0d6f0 6169 if (ras && adev->ras_enabled &&
acdae216 6170 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
6171 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6172
1bece222
CL
6173 if (amdgpu_passthrough(adev) &&
6174 adev->nbio.funcs->clear_doorbell_interrupt)
6175 adev->nbio.funcs->clear_doorbell_interrupt(adev);
6176
7a22677b 6177 return 0;
361dbd01 6178}
c9a6b82f
AG
6179
6180/**
6181 * amdgpu_pci_error_detected - Called when a PCI error is detected.
6182 * @pdev: PCI device struct
6183 * @state: PCI channel state
6184 *
6185 * Description: Called when a PCI error is detected.
6186 *
6187 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6188 */
6189pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6190{
6191 struct drm_device *dev = pci_get_drvdata(pdev);
6192 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6193 int i;
c9a6b82f
AG
6194
6195 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6196
6894305c
AG
6197 if (adev->gmc.xgmi.num_physical_nodes > 1) {
6198 DRM_WARN("No support for XGMI hive yet...");
6199 return PCI_ERS_RESULT_DISCONNECT;
6200 }
6201
e17e27f9
GC
6202 adev->pci_channel_state = state;
6203
c9a6b82f
AG
6204 switch (state) {
6205 case pci_channel_io_normal:
6206 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 6207 /* Fatal error, prepare for slot reset */
8a11d283
TZ
6208 case pci_channel_io_frozen:
6209 /*
d0fb18b5 6210 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
6211 * to GPU during PCI error recovery
6212 */
3675c2f2 6213 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 6214 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
6215
6216 /*
6217 * Block any work scheduling as we do for regular GPU reset
6218 * for the duration of the recovery
6219 */
6220 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6221 struct amdgpu_ring *ring = adev->rings[i];
6222
9749c868 6223 if (!amdgpu_ring_sched_ready(ring))
acd89fca
AG
6224 continue;
6225
6226 drm_sched_stop(&ring->sched, NULL);
6227 }
8f8c80f4 6228 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
6229 return PCI_ERS_RESULT_NEED_RESET;
6230 case pci_channel_io_perm_failure:
6231 /* Permanent error, prepare for device removal */
6232 return PCI_ERS_RESULT_DISCONNECT;
6233 }
6234
6235 return PCI_ERS_RESULT_NEED_RESET;
6236}
6237
6238/**
6239 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6240 * @pdev: pointer to PCI device
6241 */
6242pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6243{
6244
6245 DRM_INFO("PCI error: mmio enabled callback!!\n");
6246
6247 /* TODO - dump whatever for debugging purposes */
6248
6249 /* This called only if amdgpu_pci_error_detected returns
6250 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6251 * works, no need to reset slot.
6252 */
6253
6254 return PCI_ERS_RESULT_RECOVERED;
6255}
6256
6257/**
6258 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6259 * @pdev: PCI device struct
6260 *
6261 * Description: This routine is called by the pci error recovery
6262 * code after the PCI slot has been reset, just before we
6263 * should resume normal operations.
6264 */
6265pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6266{
6267 struct drm_device *dev = pci_get_drvdata(pdev);
6268 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 6269 int r, i;
04442bf7 6270 struct amdgpu_reset_context reset_context;
362c7b91 6271 u32 memsize;
7ac71382 6272 struct list_head device_list;
601429cc
SY
6273 struct amdgpu_hive_info *hive;
6274 int hive_ras_recovery = 0;
6275 struct amdgpu_ras *ras;
6276
6277 /* PCI error slot reset should be skipped During RAS recovery */
6278 hive = amdgpu_get_xgmi_hive(adev);
6279 if (hive) {
6280 hive_ras_recovery = atomic_read(&hive->ras_recovery);
6281 amdgpu_put_xgmi_hive(hive);
6282 }
6283 ras = amdgpu_ras_get_context(adev);
6284 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
6285 ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
6286 return PCI_ERS_RESULT_RECOVERED;
c9a6b82f
AG
6287
6288 DRM_INFO("PCI error: slot reset callback!!\n");
6289
04442bf7
LL
6290 memset(&reset_context, 0, sizeof(reset_context));
6291
7ac71382 6292 INIT_LIST_HEAD(&device_list);
655ce9cb 6293 list_add_tail(&adev->reset_list, &device_list);
7ac71382 6294
362c7b91
AG
6295 /* wait for asic to come out of reset */
6296 msleep(500);
6297
7ac71382 6298 /* Restore PCI confspace */
c1dd4aa6 6299 amdgpu_device_load_pci_state(pdev);
c9a6b82f 6300
362c7b91
AG
6301 /* confirm ASIC came out of reset */
6302 for (i = 0; i < adev->usec_timeout; i++) {
6303 memsize = amdgpu_asic_get_config_memsize(adev);
6304
6305 if (memsize != 0xffffffff)
6306 break;
6307 udelay(1);
6308 }
6309 if (memsize == 0xffffffff) {
6310 r = -ETIME;
6311 goto out;
6312 }
6313
04442bf7
LL
6314 reset_context.method = AMD_RESET_METHOD_NONE;
6315 reset_context.reset_req_dev = adev;
6316 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6317 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6318
7afefb81 6319 adev->no_hw_access = true;
04442bf7 6320 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 6321 adev->no_hw_access = false;
c9a6b82f
AG
6322 if (r)
6323 goto out;
6324
04442bf7 6325 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
6326
6327out:
c9a6b82f 6328 if (!r) {
c1dd4aa6
AG
6329 if (amdgpu_device_cache_pci_state(adev->pdev))
6330 pci_restore_state(adev->pdev);
6331
c9a6b82f
AG
6332 DRM_INFO("PCIe error recovery succeeded\n");
6333 } else {
6334 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
6335 amdgpu_device_unset_mp1_state(adev);
6336 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
6337 }
6338
6339 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6340}
6341
6342/**
6343 * amdgpu_pci_resume() - resume normal ops after PCI reset
6344 * @pdev: pointer to PCI device
6345 *
6346 * Called when the error recovery driver tells us that its
505199a3 6347 * OK to resume normal operation.
c9a6b82f
AG
6348 */
6349void amdgpu_pci_resume(struct pci_dev *pdev)
6350{
6351 struct drm_device *dev = pci_get_drvdata(pdev);
6352 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6353 int i;
c9a6b82f 6354
c9a6b82f
AG
6355
6356 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 6357
e17e27f9
GC
6358 /* Only continue execution for the case of pci_channel_io_frozen */
6359 if (adev->pci_channel_state != pci_channel_io_frozen)
6360 return;
6361
acd89fca
AG
6362 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6363 struct amdgpu_ring *ring = adev->rings[i];
6364
9749c868 6365 if (!amdgpu_ring_sched_ready(ring))
acd89fca
AG
6366 continue;
6367
acd89fca
AG
6368 drm_sched_start(&ring->sched, true);
6369 }
6370
e923be99
AG
6371 amdgpu_device_unset_mp1_state(adev);
6372 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6373}
c1dd4aa6
AG
6374
6375bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6376{
6377 struct drm_device *dev = pci_get_drvdata(pdev);
6378 struct amdgpu_device *adev = drm_to_adev(dev);
6379 int r;
6380
6381 r = pci_save_state(pdev);
6382 if (!r) {
6383 kfree(adev->pci_state);
6384
6385 adev->pci_state = pci_store_saved_state(pdev);
6386
6387 if (!adev->pci_state) {
6388 DRM_ERROR("Failed to store PCI saved state");
6389 return false;
6390 }
6391 } else {
6392 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6393 return false;
6394 }
6395
6396 return true;
6397}
6398
6399bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6400{
6401 struct drm_device *dev = pci_get_drvdata(pdev);
6402 struct amdgpu_device *adev = drm_to_adev(dev);
6403 int r;
6404
6405 if (!adev->pci_state)
6406 return false;
6407
6408 r = pci_load_saved_state(pdev, adev->pci_state);
6409
6410 if (!r) {
6411 pci_restore_state(pdev);
6412 } else {
6413 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6414 return false;
6415 }
6416
6417 return true;
6418}
6419
810085dd
EH
6420void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6421 struct amdgpu_ring *ring)
6422{
6423#ifdef CONFIG_X86_64
b818a5d3 6424 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6425 return;
6426#endif
6427 if (adev->gmc.xgmi.connected_to_cpu)
6428 return;
6429
6430 if (ring && ring->funcs->emit_hdp_flush)
6431 amdgpu_ring_emit_hdp_flush(ring);
6432 else
6433 amdgpu_asic_flush_hdp(adev, ring);
6434}
c1dd4aa6 6435
810085dd
EH
6436void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6437 struct amdgpu_ring *ring)
6438{
6439#ifdef CONFIG_X86_64
b818a5d3 6440 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6441 return;
6442#endif
6443 if (adev->gmc.xgmi.connected_to_cpu)
6444 return;
c1dd4aa6 6445
810085dd
EH
6446 amdgpu_asic_invalidate_hdp(adev, ring);
6447}
34f3a4a9 6448
89a7a870
AG
6449int amdgpu_in_reset(struct amdgpu_device *adev)
6450{
6451 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6452}
6453
34f3a4a9
LY
6454/**
6455 * amdgpu_device_halt() - bring hardware to some kind of halt state
6456 *
6457 * @adev: amdgpu_device pointer
6458 *
6459 * Bring hardware to some kind of halt state so that no one can touch it
6460 * any more. It will help to maintain error context when error occurred.
6461 * Compare to a simple hang, the system will keep stable at least for SSH
6462 * access. Then it should be trivial to inspect the hardware state and
6463 * see what's going on. Implemented as following:
6464 *
6465 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6466 * clears all CPU mappings to device, disallows remappings through page faults
6467 * 2. amdgpu_irq_disable_all() disables all interrupts
6468 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6469 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6470 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6471 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6472 * flush any in flight DMA operations
6473 */
6474void amdgpu_device_halt(struct amdgpu_device *adev)
6475{
6476 struct pci_dev *pdev = adev->pdev;
e0f943b4 6477 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6478
2c1c7ba4 6479 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6480 drm_dev_unplug(ddev);
6481
6482 amdgpu_irq_disable_all(adev);
6483
6484 amdgpu_fence_driver_hw_fini(adev);
6485
6486 adev->no_hw_access = true;
6487
6488 amdgpu_device_unmap_mmio(adev);
6489
6490 pci_disable_device(pdev);
6491 pci_wait_for_pending_transaction(pdev);
6492}
86700a40
XD
6493
6494u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6495 u32 reg)
6496{
6497 unsigned long flags, address, data;
6498 u32 r;
6499
6500 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6501 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6502
6503 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6504 WREG32(address, reg * 4);
6505 (void)RREG32(address);
6506 r = RREG32(data);
6507 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6508 return r;
6509}
6510
6511void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6512 u32 reg, u32 v)
6513{
6514 unsigned long flags, address, data;
6515
6516 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6517 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6518
6519 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6520 WREG32(address, reg * 4);
6521 (void)RREG32(address);
6522 WREG32(data, v);
6523 (void)RREG32(data);
6524 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6525}
68ce8b24
CK
6526
6527/**
6528 * amdgpu_device_switch_gang - switch to a new gang
6529 * @adev: amdgpu_device pointer
6530 * @gang: the gang to switch to
6531 *
6532 * Try to switch to a new gang.
6533 * Returns: NULL if we switched to the new gang or a reference to the current
6534 * gang leader.
6535 */
6536struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6537 struct dma_fence *gang)
6538{
6539 struct dma_fence *old = NULL;
6540
6541 do {
6542 dma_fence_put(old);
6543 rcu_read_lock();
6544 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6545 rcu_read_unlock();
6546
6547 if (old == gang)
6548 break;
6549
6550 if (!dma_fence_is_signaled(old))
6551 return old;
6552
6553 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6554 old, gang) != old);
6555
6556 dma_fence_put(old);
6557 return NULL;
6558}
220c8cc8
AD
6559
6560bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6561{
6562 switch (adev->asic_type) {
6563#ifdef CONFIG_DRM_AMDGPU_SI
6564 case CHIP_HAINAN:
6565#endif
6566 case CHIP_TOPAZ:
6567 /* chips with no display hardware */
6568 return false;
6569#ifdef CONFIG_DRM_AMDGPU_SI
6570 case CHIP_TAHITI:
6571 case CHIP_PITCAIRN:
6572 case CHIP_VERDE:
6573 case CHIP_OLAND:
6574#endif
6575#ifdef CONFIG_DRM_AMDGPU_CIK
6576 case CHIP_BONAIRE:
6577 case CHIP_HAWAII:
6578 case CHIP_KAVERI:
6579 case CHIP_KABINI:
6580 case CHIP_MULLINS:
6581#endif
6582 case CHIP_TONGA:
6583 case CHIP_FIJI:
6584 case CHIP_POLARIS10:
6585 case CHIP_POLARIS11:
6586 case CHIP_POLARIS12:
6587 case CHIP_VEGAM:
6588 case CHIP_CARRIZO:
6589 case CHIP_STONEY:
6590 /* chips with display hardware */
6591 return true;
6592 default:
6593 /* IP discovery */
4e8303cf 6594 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6595 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6596 return false;
6597 return true;
6598 }
6599}
81283fee
JZ
6600
6601uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6602 uint32_t inst, uint32_t reg_addr, char reg_name[],
6603 uint32_t expected_value, uint32_t mask)
6604{
6605 uint32_t ret = 0;
6606 uint32_t old_ = 0;
6607 uint32_t tmp_ = RREG32(reg_addr);
6608 uint32_t loop = adev->usec_timeout;
6609
6610 while ((tmp_ & (mask)) != (expected_value)) {
6611 if (old_ != tmp_) {
6612 loop = adev->usec_timeout;
6613 old_ = tmp_;
6614 } else
6615 udelay(1);
6616 tmp_ = RREG32(reg_addr);
6617 loop--;
6618 if (!loop) {
6619 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6620 inst, reg_name, (uint32_t)expected_value,
6621 (uint32_t)(tmp_ & (mask)));
6622 ret = -ETIMEDOUT;
6623 break;
6624 }
6625 }
6626 return ret;
6627}