drm/amdgpu: Add sysfs attribute to get board info
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
76da73f0
LL
165static ssize_t amdgpu_device_get_board_info(struct device *dev,
166 struct device_attribute *attr,
167 char *buf)
168{
169 struct drm_device *ddev = dev_get_drvdata(dev);
170 struct amdgpu_device *adev = drm_to_adev(ddev);
171 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
172 const char *pkg;
173
174 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
175 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
176
177 switch (pkg_type) {
178 case AMDGPU_PKG_TYPE_CEM:
179 pkg = "cem";
180 break;
181 case AMDGPU_PKG_TYPE_OAM:
182 pkg = "oam";
183 break;
184 default:
185 pkg = "unknown";
186 break;
187 }
188
189 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
190}
191
192static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
193
194static struct attribute *amdgpu_board_attrs[] = {
195 &dev_attr_board_info.attr,
196 NULL,
197};
198
199static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
200 struct attribute *attr, int n)
201{
202 struct device *dev = kobj_to_dev(kobj);
203 struct drm_device *ddev = dev_get_drvdata(dev);
204 struct amdgpu_device *adev = drm_to_adev(ddev);
205
206 if (adev->flags & AMD_IS_APU)
207 return 0;
208
209 return attr->mode;
210}
211
212static const struct attribute_group amdgpu_board_attrs_group = {
213 .attrs = amdgpu_board_attrs,
214 .is_visible = amdgpu_board_attrs_is_visible
215};
216
5494d864
AD
217static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
218
bd607166 219
fd496ca8 220/**
b98c6299 221 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
222 *
223 * @dev: drm_device pointer
224 *
b98c6299 225 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
226 * otherwise return false.
227 */
b98c6299 228bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
229{
230 struct amdgpu_device *adev = drm_to_adev(dev);
231
b98c6299 232 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
233 return true;
234 return false;
235}
236
e3ecdffa 237/**
0330b848 238 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
239 *
240 * @dev: drm_device pointer
241 *
b98c6299 242 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
243 * otherwise return false.
244 */
31af062a 245bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 246{
1348969a 247 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 248
b98c6299
AD
249 if (adev->has_pr3 ||
250 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
251 return true;
252 return false;
253}
254
a69cba42
AD
255/**
256 * amdgpu_device_supports_baco - Does the device support BACO
257 *
258 * @dev: drm_device pointer
259 *
260 * Returns true if the device supporte BACO,
261 * otherwise return false.
262 */
263bool amdgpu_device_supports_baco(struct drm_device *dev)
264{
1348969a 265 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
266
267 return amdgpu_asic_supports_baco(adev);
268}
269
3fa8f89d
S
270/**
271 * amdgpu_device_supports_smart_shift - Is the device dGPU with
272 * smart shift support
273 *
274 * @dev: drm_device pointer
275 *
276 * Returns true if the device is a dGPU with Smart Shift support,
277 * otherwise returns false.
278 */
279bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
280{
281 return (amdgpu_device_supports_boco(dev) &&
282 amdgpu_acpi_is_power_shift_control_supported());
283}
284
6e3cd2a9
MCC
285/*
286 * VRAM access helper functions
287 */
288
e35e2b11 289/**
048af66b 290 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
291 *
292 * @adev: amdgpu_device pointer
293 * @pos: offset of the buffer in vram
294 * @buf: virtual address of the buffer in system memory
295 * @size: read/write size, sizeof(@buf) must > @size
296 * @write: true - write to vram, otherwise - read from vram
297 */
048af66b
KW
298void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
299 void *buf, size_t size, bool write)
e35e2b11 300{
e35e2b11 301 unsigned long flags;
048af66b
KW
302 uint32_t hi = ~0, tmp = 0;
303 uint32_t *data = buf;
ce05ac56 304 uint64_t last;
f89f8c6b 305 int idx;
ce05ac56 306
c58a863b 307 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 308 return;
9d11eb0d 309
048af66b
KW
310 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
311
312 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
313 for (last = pos + size; pos < last; pos += 4) {
314 tmp = pos >> 31;
315
316 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
317 if (tmp != hi) {
318 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
319 hi = tmp;
320 }
321 if (write)
322 WREG32_NO_KIQ(mmMM_DATA, *data++);
323 else
324 *data++ = RREG32_NO_KIQ(mmMM_DATA);
325 }
326
327 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
328 drm_dev_exit(idx);
329}
330
331/**
bbe04dec 332 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
333 *
334 * @adev: amdgpu_device pointer
335 * @pos: offset of the buffer in vram
336 * @buf: virtual address of the buffer in system memory
337 * @size: read/write size, sizeof(@buf) must > @size
338 * @write: true - write to vram, otherwise - read from vram
339 *
340 * The return value means how many bytes have been transferred.
341 */
342size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
343 void *buf, size_t size, bool write)
344{
9d11eb0d 345#ifdef CONFIG_64BIT
048af66b
KW
346 void __iomem *addr;
347 size_t count = 0;
348 uint64_t last;
349
350 if (!adev->mman.aper_base_kaddr)
351 return 0;
352
9d11eb0d
CK
353 last = min(pos + size, adev->gmc.visible_vram_size);
354 if (last > pos) {
048af66b
KW
355 addr = adev->mman.aper_base_kaddr + pos;
356 count = last - pos;
9d11eb0d
CK
357
358 if (write) {
359 memcpy_toio(addr, buf, count);
4c452b5c
SS
360 /* Make sure HDP write cache flush happens without any reordering
361 * after the system memory contents are sent over PCIe device
362 */
9d11eb0d 363 mb();
810085dd 364 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 365 } else {
810085dd 366 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
367 /* Make sure HDP read cache is invalidated before issuing a read
368 * to the PCIe device
369 */
9d11eb0d
CK
370 mb();
371 memcpy_fromio(buf, addr, count);
372 }
373
9d11eb0d 374 }
048af66b
KW
375
376 return count;
377#else
378 return 0;
9d11eb0d 379#endif
048af66b 380}
9d11eb0d 381
048af66b
KW
382/**
383 * amdgpu_device_vram_access - read/write a buffer in vram
384 *
385 * @adev: amdgpu_device pointer
386 * @pos: offset of the buffer in vram
387 * @buf: virtual address of the buffer in system memory
388 * @size: read/write size, sizeof(@buf) must > @size
389 * @write: true - write to vram, otherwise - read from vram
390 */
391void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
392 void *buf, size_t size, bool write)
393{
394 size_t count;
e35e2b11 395
048af66b
KW
396 /* try to using vram apreature to access vram first */
397 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
398 size -= count;
399 if (size) {
400 /* using MM to access rest vram */
401 pos += count;
402 buf += count;
403 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
404 }
405}
406
d38ceaf9 407/*
f7ee1874 408 * register access helper functions.
d38ceaf9 409 */
56b53c0b
DL
410
411/* Check if hw access should be skipped because of hotplug or device error */
412bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
413{
7afefb81 414 if (adev->no_hw_access)
56b53c0b
DL
415 return true;
416
417#ifdef CONFIG_LOCKDEP
418 /*
419 * This is a bit complicated to understand, so worth a comment. What we assert
420 * here is that the GPU reset is not running on another thread in parallel.
421 *
422 * For this we trylock the read side of the reset semaphore, if that succeeds
423 * we know that the reset is not running in paralell.
424 *
425 * If the trylock fails we assert that we are either already holding the read
426 * side of the lock or are the reset thread itself and hold the write side of
427 * the lock.
428 */
429 if (in_task()) {
d0fb18b5
AG
430 if (down_read_trylock(&adev->reset_domain->sem))
431 up_read(&adev->reset_domain->sem);
56b53c0b 432 else
d0fb18b5 433 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
434 }
435#endif
436 return false;
437}
438
e3ecdffa 439/**
f7ee1874 440 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
441 *
442 * @adev: amdgpu_device pointer
443 * @reg: dword aligned register offset
444 * @acc_flags: access flags which require special behavior
445 *
446 * Returns the 32 bit value from the offset specified.
447 */
f7ee1874
HZ
448uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
449 uint32_t reg, uint32_t acc_flags)
d38ceaf9 450{
f4b373f4
TSD
451 uint32_t ret;
452
56b53c0b 453 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
454 return 0;
455
f7ee1874
HZ
456 if ((reg * 4) < adev->rmmio_size) {
457 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
458 amdgpu_sriov_runtime(adev) &&
d0fb18b5 459 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 460 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 461 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
462 } else {
463 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
464 }
465 } else {
466 ret = adev->pcie_rreg(adev, reg * 4);
81202807 467 }
bc992ba5 468
f7ee1874 469 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 470
f4b373f4 471 return ret;
d38ceaf9
AD
472}
473
421a2a30
ML
474/*
475 * MMIO register read with bytes helper functions
476 * @offset:bytes offset from MMIO start
b8920e1e 477 */
421a2a30 478
e3ecdffa
AD
479/**
480 * amdgpu_mm_rreg8 - read a memory mapped IO register
481 *
482 * @adev: amdgpu_device pointer
483 * @offset: byte aligned register offset
484 *
485 * Returns the 8 bit value from the offset specified.
486 */
7cbbc745
AG
487uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
488{
56b53c0b 489 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
490 return 0;
491
421a2a30
ML
492 if (offset < adev->rmmio_size)
493 return (readb(adev->rmmio + offset));
494 BUG();
495}
496
497/*
498 * MMIO register write with bytes helper functions
499 * @offset:bytes offset from MMIO start
500 * @value: the value want to be written to the register
b8920e1e
SS
501 */
502
e3ecdffa
AD
503/**
504 * amdgpu_mm_wreg8 - read a memory mapped IO register
505 *
506 * @adev: amdgpu_device pointer
507 * @offset: byte aligned register offset
508 * @value: 8 bit value to write
509 *
510 * Writes the value specified to the offset specified.
511 */
7cbbc745
AG
512void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
513{
56b53c0b 514 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
515 return;
516
421a2a30
ML
517 if (offset < adev->rmmio_size)
518 writeb(value, adev->rmmio + offset);
519 else
520 BUG();
521}
522
e3ecdffa 523/**
f7ee1874 524 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
525 *
526 * @adev: amdgpu_device pointer
527 * @reg: dword aligned register offset
528 * @v: 32 bit value to write to the register
529 * @acc_flags: access flags which require special behavior
530 *
531 * Writes the value specified to the offset specified.
532 */
f7ee1874
HZ
533void amdgpu_device_wreg(struct amdgpu_device *adev,
534 uint32_t reg, uint32_t v,
535 uint32_t acc_flags)
d38ceaf9 536{
56b53c0b 537 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
538 return;
539
f7ee1874
HZ
540 if ((reg * 4) < adev->rmmio_size) {
541 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
542 amdgpu_sriov_runtime(adev) &&
d0fb18b5 543 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 544 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 545 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
546 } else {
547 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
548 }
549 } else {
550 adev->pcie_wreg(adev, reg * 4, v);
81202807 551 }
bc992ba5 552
f7ee1874 553 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 554}
d38ceaf9 555
03f2abb0 556/**
4cc9f86f 557 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 558 *
71579346
RB
559 * @adev: amdgpu_device pointer
560 * @reg: mmio/rlc register
561 * @v: value to write
8057a9d6 562 * @xcc_id: xcc accelerated compute core id
71579346
RB
563 *
564 * this function is invoked only for the debugfs register access
03f2abb0 565 */
f7ee1874 566void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
567 uint32_t reg, uint32_t v,
568 uint32_t xcc_id)
2e0cc4d4 569{
56b53c0b 570 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
571 return;
572
2e0cc4d4 573 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
574 adev->gfx.rlc.funcs &&
575 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 576 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 577 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
578 } else if ((reg * 4) >= adev->rmmio_size) {
579 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
580 } else {
581 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 582 }
d38ceaf9
AD
583}
584
1bba3683
HZ
585/**
586 * amdgpu_device_indirect_rreg - read an indirect register
587 *
588 * @adev: amdgpu_device pointer
22f453fb 589 * @reg_addr: indirect register address to read from
1bba3683
HZ
590 *
591 * Returns the value of indirect register @reg_addr
592 */
593u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
594 u32 reg_addr)
595{
65ba96e9 596 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
597 void __iomem *pcie_index_offset;
598 void __iomem *pcie_data_offset;
65ba96e9
HZ
599 u32 r;
600
601 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
602 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
603
604 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
605 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
606 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
607
608 writel(reg_addr, pcie_index_offset);
609 readl(pcie_index_offset);
610 r = readl(pcie_data_offset);
611 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
612
613 return r;
614}
615
0c552ed3
LM
616u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
617 u64 reg_addr)
618{
619 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
620 u32 r;
621 void __iomem *pcie_index_offset;
622 void __iomem *pcie_index_hi_offset;
623 void __iomem *pcie_data_offset;
624
625 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
626 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 627 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
628 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
629 else
630 pcie_index_hi = 0;
631
632 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
633 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
634 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
635 if (pcie_index_hi != 0)
636 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
637 pcie_index_hi * 4;
638
639 writel(reg_addr, pcie_index_offset);
640 readl(pcie_index_offset);
641 if (pcie_index_hi != 0) {
642 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
643 readl(pcie_index_hi_offset);
644 }
645 r = readl(pcie_data_offset);
646
647 /* clear the high bits */
648 if (pcie_index_hi != 0) {
649 writel(0, pcie_index_hi_offset);
650 readl(pcie_index_hi_offset);
651 }
652
653 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
654
655 return r;
656}
657
1bba3683
HZ
658/**
659 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
660 *
661 * @adev: amdgpu_device pointer
22f453fb 662 * @reg_addr: indirect register address to read from
1bba3683
HZ
663 *
664 * Returns the value of indirect register @reg_addr
665 */
666u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
667 u32 reg_addr)
668{
65ba96e9 669 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
670 void __iomem *pcie_index_offset;
671 void __iomem *pcie_data_offset;
65ba96e9
HZ
672 u64 r;
673
674 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
675 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
676
677 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
678 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
679 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
680
681 /* read low 32 bits */
682 writel(reg_addr, pcie_index_offset);
683 readl(pcie_index_offset);
684 r = readl(pcie_data_offset);
685 /* read high 32 bits */
686 writel(reg_addr + 4, pcie_index_offset);
687 readl(pcie_index_offset);
688 r |= ((u64)readl(pcie_data_offset) << 32);
689 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
690
691 return r;
692}
693
a76b2870
CL
694u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
695 u64 reg_addr)
696{
697 unsigned long flags, pcie_index, pcie_data;
698 unsigned long pcie_index_hi = 0;
699 void __iomem *pcie_index_offset;
700 void __iomem *pcie_index_hi_offset;
701 void __iomem *pcie_data_offset;
702 u64 r;
703
704 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
705 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
706 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
707 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
708
709 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
710 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
711 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
712 if (pcie_index_hi != 0)
713 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
714 pcie_index_hi * 4;
715
716 /* read low 32 bits */
717 writel(reg_addr, pcie_index_offset);
718 readl(pcie_index_offset);
719 if (pcie_index_hi != 0) {
720 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
721 readl(pcie_index_hi_offset);
722 }
723 r = readl(pcie_data_offset);
724 /* read high 32 bits */
725 writel(reg_addr + 4, pcie_index_offset);
726 readl(pcie_index_offset);
727 if (pcie_index_hi != 0) {
728 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
729 readl(pcie_index_hi_offset);
730 }
731 r |= ((u64)readl(pcie_data_offset) << 32);
732
733 /* clear the high bits */
734 if (pcie_index_hi != 0) {
735 writel(0, pcie_index_hi_offset);
736 readl(pcie_index_hi_offset);
737 }
738
739 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
740
741 return r;
742}
743
1bba3683
HZ
744/**
745 * amdgpu_device_indirect_wreg - write an indirect register address
746 *
747 * @adev: amdgpu_device pointer
1bba3683
HZ
748 * @reg_addr: indirect register offset
749 * @reg_data: indirect register data
750 *
751 */
752void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
753 u32 reg_addr, u32 reg_data)
754{
65ba96e9 755 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
756 void __iomem *pcie_index_offset;
757 void __iomem *pcie_data_offset;
758
65ba96e9
HZ
759 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
760 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
761
1bba3683
HZ
762 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
763 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
764 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
765
766 writel(reg_addr, pcie_index_offset);
767 readl(pcie_index_offset);
768 writel(reg_data, pcie_data_offset);
769 readl(pcie_data_offset);
770 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
771}
772
0c552ed3
LM
773void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
774 u64 reg_addr, u32 reg_data)
775{
776 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
777 void __iomem *pcie_index_offset;
778 void __iomem *pcie_index_hi_offset;
779 void __iomem *pcie_data_offset;
780
781 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
782 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 783 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
784 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
785 else
786 pcie_index_hi = 0;
787
788 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
789 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
790 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
791 if (pcie_index_hi != 0)
792 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
793 pcie_index_hi * 4;
794
795 writel(reg_addr, pcie_index_offset);
796 readl(pcie_index_offset);
797 if (pcie_index_hi != 0) {
798 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
799 readl(pcie_index_hi_offset);
800 }
801 writel(reg_data, pcie_data_offset);
802 readl(pcie_data_offset);
803
804 /* clear the high bits */
805 if (pcie_index_hi != 0) {
806 writel(0, pcie_index_hi_offset);
807 readl(pcie_index_hi_offset);
808 }
809
810 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
811}
812
1bba3683
HZ
813/**
814 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
815 *
816 * @adev: amdgpu_device pointer
1bba3683
HZ
817 * @reg_addr: indirect register offset
818 * @reg_data: indirect register data
819 *
820 */
821void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
822 u32 reg_addr, u64 reg_data)
823{
65ba96e9 824 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
825 void __iomem *pcie_index_offset;
826 void __iomem *pcie_data_offset;
827
65ba96e9
HZ
828 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
829 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
830
1bba3683
HZ
831 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
832 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
833 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
834
835 /* write low 32 bits */
836 writel(reg_addr, pcie_index_offset);
837 readl(pcie_index_offset);
838 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
839 readl(pcie_data_offset);
840 /* write high 32 bits */
841 writel(reg_addr + 4, pcie_index_offset);
842 readl(pcie_index_offset);
843 writel((u32)(reg_data >> 32), pcie_data_offset);
844 readl(pcie_data_offset);
845 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
846}
847
a76b2870
CL
848void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
849 u64 reg_addr, u64 reg_data)
850{
851 unsigned long flags, pcie_index, pcie_data;
852 unsigned long pcie_index_hi = 0;
853 void __iomem *pcie_index_offset;
854 void __iomem *pcie_index_hi_offset;
855 void __iomem *pcie_data_offset;
856
857 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
858 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
859 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
860 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
861
862 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
863 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
864 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
865 if (pcie_index_hi != 0)
866 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
867 pcie_index_hi * 4;
868
869 /* write low 32 bits */
870 writel(reg_addr, pcie_index_offset);
871 readl(pcie_index_offset);
872 if (pcie_index_hi != 0) {
873 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
874 readl(pcie_index_hi_offset);
875 }
876 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
877 readl(pcie_data_offset);
878 /* write high 32 bits */
879 writel(reg_addr + 4, pcie_index_offset);
880 readl(pcie_index_offset);
881 if (pcie_index_hi != 0) {
882 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
883 readl(pcie_index_hi_offset);
884 }
885 writel((u32)(reg_data >> 32), pcie_data_offset);
886 readl(pcie_data_offset);
887
888 /* clear the high bits */
889 if (pcie_index_hi != 0) {
890 writel(0, pcie_index_hi_offset);
891 readl(pcie_index_hi_offset);
892 }
893
894 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
895}
896
dabc114e
HZ
897/**
898 * amdgpu_device_get_rev_id - query device rev_id
899 *
900 * @adev: amdgpu_device pointer
901 *
902 * Return device rev_id
903 */
904u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
905{
906 return adev->nbio.funcs->get_rev_id(adev);
907}
908
d38ceaf9
AD
909/**
910 * amdgpu_invalid_rreg - dummy reg read function
911 *
982a820b 912 * @adev: amdgpu_device pointer
d38ceaf9
AD
913 * @reg: offset of register
914 *
915 * Dummy register read function. Used for register blocks
916 * that certain asics don't have (all asics).
917 * Returns the value in the register.
918 */
919static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
920{
921 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
922 BUG();
923 return 0;
924}
925
0c552ed3
LM
926static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
927{
928 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
929 BUG();
930 return 0;
931}
932
d38ceaf9
AD
933/**
934 * amdgpu_invalid_wreg - dummy reg write function
935 *
982a820b 936 * @adev: amdgpu_device pointer
d38ceaf9
AD
937 * @reg: offset of register
938 * @v: value to write to the register
939 *
940 * Dummy register read function. Used for register blocks
941 * that certain asics don't have (all asics).
942 */
943static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
944{
945 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
946 reg, v);
947 BUG();
948}
949
0c552ed3
LM
950static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
951{
952 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
953 reg, v);
954 BUG();
955}
956
4fa1c6a6
TZ
957/**
958 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
959 *
982a820b 960 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
961 * @reg: offset of register
962 *
963 * Dummy register read function. Used for register blocks
964 * that certain asics don't have (all asics).
965 * Returns the value in the register.
966 */
967static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
968{
969 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
970 BUG();
971 return 0;
972}
973
a76b2870
CL
974static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
975{
976 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
977 BUG();
978 return 0;
979}
980
4fa1c6a6
TZ
981/**
982 * amdgpu_invalid_wreg64 - dummy reg write function
983 *
982a820b 984 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
985 * @reg: offset of register
986 * @v: value to write to the register
987 *
988 * Dummy register read function. Used for register blocks
989 * that certain asics don't have (all asics).
990 */
991static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
992{
993 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
994 reg, v);
995 BUG();
996}
997
a76b2870
CL
998static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
999{
1000 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1001 reg, v);
1002 BUG();
1003}
1004
d38ceaf9
AD
1005/**
1006 * amdgpu_block_invalid_rreg - dummy reg read function
1007 *
982a820b 1008 * @adev: amdgpu_device pointer
d38ceaf9
AD
1009 * @block: offset of instance
1010 * @reg: offset of register
1011 *
1012 * Dummy register read function. Used for register blocks
1013 * that certain asics don't have (all asics).
1014 * Returns the value in the register.
1015 */
1016static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1017 uint32_t block, uint32_t reg)
1018{
1019 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1020 reg, block);
1021 BUG();
1022 return 0;
1023}
1024
1025/**
1026 * amdgpu_block_invalid_wreg - dummy reg write function
1027 *
982a820b 1028 * @adev: amdgpu_device pointer
d38ceaf9
AD
1029 * @block: offset of instance
1030 * @reg: offset of register
1031 * @v: value to write to the register
1032 *
1033 * Dummy register read function. Used for register blocks
1034 * that certain asics don't have (all asics).
1035 */
1036static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1037 uint32_t block,
1038 uint32_t reg, uint32_t v)
1039{
1040 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1041 reg, block, v);
1042 BUG();
1043}
1044
4d2997ab
AD
1045/**
1046 * amdgpu_device_asic_init - Wrapper for atom asic_init
1047 *
982a820b 1048 * @adev: amdgpu_device pointer
4d2997ab
AD
1049 *
1050 * Does any asic specific work and then calls atom asic init.
1051 */
1052static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1053{
15c5c5f5
LL
1054 int ret;
1055
4d2997ab
AD
1056 amdgpu_asic_pre_asic_init(adev);
1057
4e8303cf
LL
1058 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1059 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1060 amdgpu_psp_wait_for_bootloader(adev);
1061 ret = amdgpu_atomfirmware_asic_init(adev, true);
1062 return ret;
1063 } else {
85d1bcc6 1064 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1065 }
1066
1067 return 0;
4d2997ab
AD
1068}
1069
e3ecdffa 1070/**
7ccfd79f 1071 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1072 *
982a820b 1073 * @adev: amdgpu_device pointer
e3ecdffa
AD
1074 *
1075 * Allocates a scratch page of VRAM for use by various things in the
1076 * driver.
1077 */
7ccfd79f 1078static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1079{
7ccfd79f
CK
1080 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1081 AMDGPU_GEM_DOMAIN_VRAM |
1082 AMDGPU_GEM_DOMAIN_GTT,
1083 &adev->mem_scratch.robj,
1084 &adev->mem_scratch.gpu_addr,
1085 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1086}
1087
e3ecdffa 1088/**
7ccfd79f 1089 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1090 *
982a820b 1091 * @adev: amdgpu_device pointer
e3ecdffa
AD
1092 *
1093 * Frees the VRAM scratch page.
1094 */
7ccfd79f 1095static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1096{
7ccfd79f 1097 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1098}
1099
1100/**
9c3f2b54 1101 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1102 *
1103 * @adev: amdgpu_device pointer
1104 * @registers: pointer to the register array
1105 * @array_size: size of the register array
1106 *
b8920e1e 1107 * Programs an array or registers with and or masks.
d38ceaf9
AD
1108 * This is a helper for setting golden registers.
1109 */
9c3f2b54
AD
1110void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1111 const u32 *registers,
1112 const u32 array_size)
d38ceaf9
AD
1113{
1114 u32 tmp, reg, and_mask, or_mask;
1115 int i;
1116
1117 if (array_size % 3)
1118 return;
1119
47fc644f 1120 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1121 reg = registers[i + 0];
1122 and_mask = registers[i + 1];
1123 or_mask = registers[i + 2];
1124
1125 if (and_mask == 0xffffffff) {
1126 tmp = or_mask;
1127 } else {
1128 tmp = RREG32(reg);
1129 tmp &= ~and_mask;
e0d07657
HZ
1130 if (adev->family >= AMDGPU_FAMILY_AI)
1131 tmp |= (or_mask & and_mask);
1132 else
1133 tmp |= or_mask;
d38ceaf9
AD
1134 }
1135 WREG32(reg, tmp);
1136 }
1137}
1138
e3ecdffa
AD
1139/**
1140 * amdgpu_device_pci_config_reset - reset the GPU
1141 *
1142 * @adev: amdgpu_device pointer
1143 *
1144 * Resets the GPU using the pci config reset sequence.
1145 * Only applicable to asics prior to vega10.
1146 */
8111c387 1147void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1148{
1149 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1150}
1151
af484df8
AD
1152/**
1153 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1154 *
1155 * @adev: amdgpu_device pointer
1156 *
1157 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1158 */
1159int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1160{
1161 return pci_reset_function(adev->pdev);
1162}
1163
d38ceaf9 1164/*
06ec9070 1165 * amdgpu_device_wb_*()
455a7bc2 1166 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1167 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1168 */
1169
1170/**
06ec9070 1171 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1172 *
1173 * @adev: amdgpu_device pointer
1174 *
1175 * Disables Writeback and frees the Writeback memory (all asics).
1176 * Used at driver shutdown.
1177 */
06ec9070 1178static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1179{
1180 if (adev->wb.wb_obj) {
a76ed485
AD
1181 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1182 &adev->wb.gpu_addr,
1183 (void **)&adev->wb.wb);
d38ceaf9
AD
1184 adev->wb.wb_obj = NULL;
1185 }
1186}
1187
1188/**
03f2abb0 1189 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1190 *
1191 * @adev: amdgpu_device pointer
1192 *
455a7bc2 1193 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1194 * Used at driver startup.
1195 * Returns 0 on success or an -error on failure.
1196 */
06ec9070 1197static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1198{
1199 int r;
1200
1201 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1202 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1203 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1204 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1205 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1206 (void **)&adev->wb.wb);
d38ceaf9
AD
1207 if (r) {
1208 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1209 return r;
1210 }
d38ceaf9
AD
1211
1212 adev->wb.num_wb = AMDGPU_MAX_WB;
1213 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1214
1215 /* clear wb memory */
73469585 1216 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1217 }
1218
1219 return 0;
1220}
1221
1222/**
131b4b36 1223 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1224 *
1225 * @adev: amdgpu_device pointer
1226 * @wb: wb index
1227 *
1228 * Allocate a wb slot for use by the driver (all asics).
1229 * Returns 0 on success or -EINVAL on failure.
1230 */
131b4b36 1231int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1232{
1233 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1234
97407b63 1235 if (offset < adev->wb.num_wb) {
7014285a 1236 __set_bit(offset, adev->wb.used);
63ae07ca 1237 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1238 return 0;
1239 } else {
1240 return -EINVAL;
1241 }
1242}
1243
d38ceaf9 1244/**
131b4b36 1245 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1246 *
1247 * @adev: amdgpu_device pointer
1248 * @wb: wb index
1249 *
1250 * Free a wb slot allocated for use by the driver (all asics)
1251 */
131b4b36 1252void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1253{
73469585 1254 wb >>= 3;
d38ceaf9 1255 if (wb < adev->wb.num_wb)
73469585 1256 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1257}
1258
d6895ad3
CK
1259/**
1260 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1261 *
1262 * @adev: amdgpu_device pointer
1263 *
1264 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1265 * to fail, but if any of the BARs is not accessible after the size we abort
1266 * driver loading by returning -ENODEV.
1267 */
1268int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1269{
453f617a 1270 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1271 struct pci_bus *root;
1272 struct resource *res;
b8920e1e 1273 unsigned int i;
d6895ad3
CK
1274 u16 cmd;
1275 int r;
1276
822130b5
AB
1277 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1278 return 0;
1279
0c03b912 1280 /* Bypass for VF */
1281 if (amdgpu_sriov_vf(adev))
1282 return 0;
1283
b7221f2b
AD
1284 /* skip if the bios has already enabled large BAR */
1285 if (adev->gmc.real_vram_size &&
1286 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1287 return 0;
1288
31b8adab
CK
1289 /* Check if the root BUS has 64bit memory resources */
1290 root = adev->pdev->bus;
1291 while (root->parent)
1292 root = root->parent;
1293
1294 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1295 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1296 res->start > 0x100000000ull)
1297 break;
1298 }
1299
1300 /* Trying to resize is pointless without a root hub window above 4GB */
1301 if (!res)
1302 return 0;
1303
453f617a
ND
1304 /* Limit the BAR size to what is available */
1305 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1306 rbar_size);
1307
d6895ad3
CK
1308 /* Disable memory decoding while we change the BAR addresses and size */
1309 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1310 pci_write_config_word(adev->pdev, PCI_COMMAND,
1311 cmd & ~PCI_COMMAND_MEMORY);
1312
1313 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1314 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1315 if (adev->asic_type >= CHIP_BONAIRE)
1316 pci_release_resource(adev->pdev, 2);
1317
1318 pci_release_resource(adev->pdev, 0);
1319
1320 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1321 if (r == -ENOSPC)
1322 DRM_INFO("Not enough PCI address space for a large BAR.");
1323 else if (r && r != -ENOTSUPP)
1324 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1325
1326 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1327
1328 /* When the doorbell or fb BAR isn't available we have no chance of
1329 * using the device.
1330 */
43c064db 1331 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1332 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1333 return -ENODEV;
1334
1335 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1336
1337 return 0;
1338}
a05502e5 1339
9535a86a
SZ
1340static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1341{
b8920e1e 1342 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1343 return false;
9535a86a
SZ
1344
1345 return true;
1346}
1347
d38ceaf9
AD
1348/*
1349 * GPU helpers function.
1350 */
1351/**
39c640c0 1352 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1353 *
1354 * @adev: amdgpu_device pointer
1355 *
c836fec5
JQ
1356 * Check if the asic has been initialized (all asics) at driver startup
1357 * or post is needed if hw reset is performed.
1358 * Returns true if need or false if not.
d38ceaf9 1359 */
39c640c0 1360bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1361{
1362 uint32_t reg;
1363
bec86378
ML
1364 if (amdgpu_sriov_vf(adev))
1365 return false;
1366
9535a86a
SZ
1367 if (!amdgpu_device_read_bios(adev))
1368 return false;
1369
bec86378 1370 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1371 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1372 * some old smc fw still need driver do vPost otherwise gpu hang, while
1373 * those smc fw version above 22.15 doesn't have this flaw, so we force
1374 * vpost executed for smc version below 22.15
bec86378
ML
1375 */
1376 if (adev->asic_type == CHIP_FIJI) {
1377 int err;
1378 uint32_t fw_ver;
b8920e1e 1379
bec86378
ML
1380 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1381 /* force vPost if error occured */
1382 if (err)
1383 return true;
1384
1385 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1386 if (fw_ver < 0x00160e00)
1387 return true;
bec86378 1388 }
bec86378 1389 }
91fe77eb 1390
e3c1b071 1391 /* Don't post if we need to reset whole hive on init */
1392 if (adev->gmc.xgmi.pending_reset)
1393 return false;
1394
91fe77eb 1395 if (adev->has_hw_reset) {
1396 adev->has_hw_reset = false;
1397 return true;
1398 }
1399
1400 /* bios scratch used on CIK+ */
1401 if (adev->asic_type >= CHIP_BONAIRE)
1402 return amdgpu_atombios_scratch_need_asic_init(adev);
1403
1404 /* check MEM_SIZE for older asics */
1405 reg = amdgpu_asic_get_config_memsize(adev);
1406
1407 if ((reg != 0) && (reg != 0xffffffff))
1408 return false;
1409
1410 return true;
70e64c4d
ML
1411}
1412
bb0f8429
ML
1413/*
1414 * Check whether seamless boot is supported.
1415 *
7f4ce7b5
ML
1416 * So far we only support seamless boot on DCE 3.0 or later.
1417 * If users report that it works on older ASICS as well, we may
1418 * loosen this.
bb0f8429
ML
1419 */
1420bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1421{
5dc270d3
ML
1422 switch (amdgpu_seamless) {
1423 case -1:
1424 break;
1425 case 1:
1426 return true;
1427 case 0:
1428 return false;
1429 default:
1430 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1431 amdgpu_seamless);
1432 return false;
1433 }
1434
3657a1d5
ML
1435 if (!(adev->flags & AMD_IS_APU))
1436 return false;
1437
5dc270d3
ML
1438 if (adev->mman.keep_stolen_vga_memory)
1439 return false;
1440
7f4ce7b5 1441 return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1442}
1443
5d1eb4c4
ML
1444/*
1445 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1446 * speed switching. Until we have confirmation from Intel that a specific host
1447 * supports it, it's safer that we keep it disabled for all.
1448 *
1449 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1450 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1451 */
1452bool amdgpu_device_pcie_dynamic_switching_supported(void)
1453{
1454#if IS_ENABLED(CONFIG_X86)
1455 struct cpuinfo_x86 *c = &cpu_data(0);
1456
1457 if (c->x86_vendor == X86_VENDOR_INTEL)
1458 return false;
1459#endif
1460 return true;
1461}
1462
0ab5d711
ML
1463/**
1464 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1465 *
1466 * @adev: amdgpu_device pointer
1467 *
1468 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1469 * be set for this device.
1470 *
1471 * Returns true if it should be used or false if not.
1472 */
1473bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1474{
1475 switch (amdgpu_aspm) {
1476 case -1:
1477 break;
1478 case 0:
1479 return false;
1480 case 1:
1481 return true;
1482 default:
1483 return false;
1484 }
1485 return pcie_aspm_enabled(adev->pdev);
1486}
1487
3ad5dcfe
KHF
1488bool amdgpu_device_aspm_support_quirk(void)
1489{
1490#if IS_ENABLED(CONFIG_X86)
1491 struct cpuinfo_x86 *c = &cpu_data(0);
1492
1493 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1494#else
1495 return true;
1496#endif
1497}
1498
d38ceaf9
AD
1499/* if we get transitioned to only one device, take VGA back */
1500/**
06ec9070 1501 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1502 *
bf44e8ce 1503 * @pdev: PCI device pointer
d38ceaf9
AD
1504 * @state: enable/disable vga decode
1505 *
1506 * Enable/disable vga decode (all asics).
1507 * Returns VGA resource flags.
1508 */
bf44e8ce
CH
1509static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1510 bool state)
d38ceaf9 1511{
bf44e8ce 1512 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1513
d38ceaf9
AD
1514 amdgpu_asic_set_vga_state(adev, state);
1515 if (state)
1516 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1517 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1518 else
1519 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1520}
1521
e3ecdffa
AD
1522/**
1523 * amdgpu_device_check_block_size - validate the vm block size
1524 *
1525 * @adev: amdgpu_device pointer
1526 *
1527 * Validates the vm block size specified via module parameter.
1528 * The vm block size defines number of bits in page table versus page directory,
1529 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1530 * page table and the remaining bits are in the page directory.
1531 */
06ec9070 1532static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1533{
1534 /* defines number of bits in page table versus page directory,
1535 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1536 * page table and the remaining bits are in the page directory
1537 */
bab4fee7
JZ
1538 if (amdgpu_vm_block_size == -1)
1539 return;
a1adf8be 1540
bab4fee7 1541 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1542 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1543 amdgpu_vm_block_size);
97489129 1544 amdgpu_vm_block_size = -1;
a1adf8be 1545 }
a1adf8be
CZ
1546}
1547
e3ecdffa
AD
1548/**
1549 * amdgpu_device_check_vm_size - validate the vm size
1550 *
1551 * @adev: amdgpu_device pointer
1552 *
1553 * Validates the vm size in GB specified via module parameter.
1554 * The VM size is the size of the GPU virtual memory space in GB.
1555 */
06ec9070 1556static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1557{
64dab074
AD
1558 /* no need to check the default value */
1559 if (amdgpu_vm_size == -1)
1560 return;
1561
83ca145d
ZJ
1562 if (amdgpu_vm_size < 1) {
1563 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1564 amdgpu_vm_size);
f3368128 1565 amdgpu_vm_size = -1;
83ca145d 1566 }
83ca145d
ZJ
1567}
1568
7951e376
RZ
1569static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1570{
1571 struct sysinfo si;
a9d4fe2f 1572 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1573 uint64_t total_memory;
1574 uint64_t dram_size_seven_GB = 0x1B8000000;
1575 uint64_t dram_size_three_GB = 0xB8000000;
1576
1577 if (amdgpu_smu_memory_pool_size == 0)
1578 return;
1579
1580 if (!is_os_64) {
1581 DRM_WARN("Not 64-bit OS, feature not supported\n");
1582 goto def_value;
1583 }
1584 si_meminfo(&si);
1585 total_memory = (uint64_t)si.totalram * si.mem_unit;
1586
1587 if ((amdgpu_smu_memory_pool_size == 1) ||
1588 (amdgpu_smu_memory_pool_size == 2)) {
1589 if (total_memory < dram_size_three_GB)
1590 goto def_value1;
1591 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1592 (amdgpu_smu_memory_pool_size == 8)) {
1593 if (total_memory < dram_size_seven_GB)
1594 goto def_value1;
1595 } else {
1596 DRM_WARN("Smu memory pool size not supported\n");
1597 goto def_value;
1598 }
1599 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1600
1601 return;
1602
1603def_value1:
1604 DRM_WARN("No enough system memory\n");
1605def_value:
1606 adev->pm.smu_prv_buffer_size = 0;
1607}
1608
9f6a7857
HR
1609static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1610{
1611 if (!(adev->flags & AMD_IS_APU) ||
1612 adev->asic_type < CHIP_RAVEN)
1613 return 0;
1614
1615 switch (adev->asic_type) {
1616 case CHIP_RAVEN:
1617 if (adev->pdev->device == 0x15dd)
1618 adev->apu_flags |= AMD_APU_IS_RAVEN;
1619 if (adev->pdev->device == 0x15d8)
1620 adev->apu_flags |= AMD_APU_IS_PICASSO;
1621 break;
1622 case CHIP_RENOIR:
1623 if ((adev->pdev->device == 0x1636) ||
1624 (adev->pdev->device == 0x164c))
1625 adev->apu_flags |= AMD_APU_IS_RENOIR;
1626 else
1627 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1628 break;
1629 case CHIP_VANGOGH:
1630 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1631 break;
1632 case CHIP_YELLOW_CARP:
1633 break;
d0f56dc2 1634 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1635 if ((adev->pdev->device == 0x13FE) ||
1636 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1637 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1638 break;
9f6a7857 1639 default:
4eaf21b7 1640 break;
9f6a7857
HR
1641 }
1642
1643 return 0;
1644}
1645
d38ceaf9 1646/**
06ec9070 1647 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1648 *
1649 * @adev: amdgpu_device pointer
1650 *
1651 * Validates certain module parameters and updates
1652 * the associated values used by the driver (all asics).
1653 */
912dfc84 1654static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1655{
5b011235
CZ
1656 if (amdgpu_sched_jobs < 4) {
1657 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1658 amdgpu_sched_jobs);
1659 amdgpu_sched_jobs = 4;
47fc644f 1660 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1661 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1662 amdgpu_sched_jobs);
1663 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1664 }
d38ceaf9 1665
83e74db6 1666 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1667 /* gart size must be greater or equal to 32M */
1668 dev_warn(adev->dev, "gart size (%d) too small\n",
1669 amdgpu_gart_size);
83e74db6 1670 amdgpu_gart_size = -1;
d38ceaf9
AD
1671 }
1672
36d38372 1673 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1674 /* gtt size must be greater or equal to 32M */
36d38372
CK
1675 dev_warn(adev->dev, "gtt size (%d) too small\n",
1676 amdgpu_gtt_size);
1677 amdgpu_gtt_size = -1;
d38ceaf9
AD
1678 }
1679
d07f14be
RH
1680 /* valid range is between 4 and 9 inclusive */
1681 if (amdgpu_vm_fragment_size != -1 &&
1682 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1683 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1684 amdgpu_vm_fragment_size = -1;
1685 }
1686
5d5bd5e3
KW
1687 if (amdgpu_sched_hw_submission < 2) {
1688 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1689 amdgpu_sched_hw_submission);
1690 amdgpu_sched_hw_submission = 2;
1691 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1692 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1693 amdgpu_sched_hw_submission);
1694 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1695 }
1696
2656fd23
AG
1697 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1698 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1699 amdgpu_reset_method = -1;
1700 }
1701
7951e376
RZ
1702 amdgpu_device_check_smu_prv_buffer_size(adev);
1703
06ec9070 1704 amdgpu_device_check_vm_size(adev);
d38ceaf9 1705
06ec9070 1706 amdgpu_device_check_block_size(adev);
6a7f76e7 1707
19aede77 1708 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1709
e3c00faa 1710 return 0;
d38ceaf9
AD
1711}
1712
1713/**
1714 * amdgpu_switcheroo_set_state - set switcheroo state
1715 *
1716 * @pdev: pci dev pointer
1694467b 1717 * @state: vga_switcheroo state
d38ceaf9 1718 *
12024b17 1719 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1720 * the asics before or after it is powered up using ACPI methods.
1721 */
8aba21b7
LT
1722static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1723 enum vga_switcheroo_state state)
d38ceaf9
AD
1724{
1725 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1726 int r;
d38ceaf9 1727
b98c6299 1728 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1729 return;
1730
1731 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1732 pr_info("switched on\n");
d38ceaf9
AD
1733 /* don't suspend or resume card normally */
1734 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1735
8f66090b
TZ
1736 pci_set_power_state(pdev, PCI_D0);
1737 amdgpu_device_load_pci_state(pdev);
1738 r = pci_enable_device(pdev);
de185019
AD
1739 if (r)
1740 DRM_WARN("pci_enable_device failed (%d)\n", r);
1741 amdgpu_device_resume(dev, true);
d38ceaf9 1742
d38ceaf9 1743 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1744 } else {
dd4fa6c1 1745 pr_info("switched off\n");
d38ceaf9 1746 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1747 amdgpu_device_suspend(dev, true);
8f66090b 1748 amdgpu_device_cache_pci_state(pdev);
de185019 1749 /* Shut down the device */
8f66090b
TZ
1750 pci_disable_device(pdev);
1751 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1752 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1753 }
1754}
1755
1756/**
1757 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1758 *
1759 * @pdev: pci dev pointer
1760 *
1761 * Callback for the switcheroo driver. Check of the switcheroo
1762 * state can be changed.
1763 * Returns true if the state can be changed, false if not.
1764 */
1765static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1766{
1767 struct drm_device *dev = pci_get_drvdata(pdev);
1768
b8920e1e 1769 /*
d38ceaf9
AD
1770 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1771 * locking inversion with the driver load path. And the access here is
1772 * completely racy anyway. So don't bother with locking for now.
1773 */
7e13ad89 1774 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1775}
1776
1777static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1778 .set_gpu_state = amdgpu_switcheroo_set_state,
1779 .reprobe = NULL,
1780 .can_switch = amdgpu_switcheroo_can_switch,
1781};
1782
e3ecdffa
AD
1783/**
1784 * amdgpu_device_ip_set_clockgating_state - set the CG state
1785 *
87e3f136 1786 * @dev: amdgpu_device pointer
e3ecdffa
AD
1787 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1788 * @state: clockgating state (gate or ungate)
1789 *
1790 * Sets the requested clockgating state for all instances of
1791 * the hardware IP specified.
1792 * Returns the error code from the last instance.
1793 */
43fa561f 1794int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1795 enum amd_ip_block_type block_type,
1796 enum amd_clockgating_state state)
d38ceaf9 1797{
43fa561f 1798 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1799 int i, r = 0;
1800
1801 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1802 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1803 continue;
c722865a
RZ
1804 if (adev->ip_blocks[i].version->type != block_type)
1805 continue;
1806 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1807 continue;
1808 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1809 (void *)adev, state);
1810 if (r)
1811 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1812 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1813 }
1814 return r;
1815}
1816
e3ecdffa
AD
1817/**
1818 * amdgpu_device_ip_set_powergating_state - set the PG state
1819 *
87e3f136 1820 * @dev: amdgpu_device pointer
e3ecdffa
AD
1821 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1822 * @state: powergating state (gate or ungate)
1823 *
1824 * Sets the requested powergating state for all instances of
1825 * the hardware IP specified.
1826 * Returns the error code from the last instance.
1827 */
43fa561f 1828int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1829 enum amd_ip_block_type block_type,
1830 enum amd_powergating_state state)
d38ceaf9 1831{
43fa561f 1832 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1833 int i, r = 0;
1834
1835 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1836 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1837 continue;
c722865a
RZ
1838 if (adev->ip_blocks[i].version->type != block_type)
1839 continue;
1840 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1841 continue;
1842 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1843 (void *)adev, state);
1844 if (r)
1845 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1846 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1847 }
1848 return r;
1849}
1850
e3ecdffa
AD
1851/**
1852 * amdgpu_device_ip_get_clockgating_state - get the CG state
1853 *
1854 * @adev: amdgpu_device pointer
1855 * @flags: clockgating feature flags
1856 *
1857 * Walks the list of IPs on the device and updates the clockgating
1858 * flags for each IP.
1859 * Updates @flags with the feature flags for each hardware IP where
1860 * clockgating is enabled.
1861 */
2990a1fc 1862void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1863 u64 *flags)
6cb2d4e4
HR
1864{
1865 int i;
1866
1867 for (i = 0; i < adev->num_ip_blocks; i++) {
1868 if (!adev->ip_blocks[i].status.valid)
1869 continue;
1870 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1871 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1872 }
1873}
1874
e3ecdffa
AD
1875/**
1876 * amdgpu_device_ip_wait_for_idle - wait for idle
1877 *
1878 * @adev: amdgpu_device pointer
1879 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1880 *
1881 * Waits for the request hardware IP to be idle.
1882 * Returns 0 for success or a negative error code on failure.
1883 */
2990a1fc
AD
1884int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1885 enum amd_ip_block_type block_type)
5dbbb60b
AD
1886{
1887 int i, r;
1888
1889 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1890 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1891 continue;
a1255107
AD
1892 if (adev->ip_blocks[i].version->type == block_type) {
1893 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1894 if (r)
1895 return r;
1896 break;
1897 }
1898 }
1899 return 0;
1900
1901}
1902
e3ecdffa
AD
1903/**
1904 * amdgpu_device_ip_is_idle - is the hardware IP idle
1905 *
1906 * @adev: amdgpu_device pointer
1907 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1908 *
1909 * Check if the hardware IP is idle or not.
1910 * Returns true if it the IP is idle, false if not.
1911 */
2990a1fc
AD
1912bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1913 enum amd_ip_block_type block_type)
5dbbb60b
AD
1914{
1915 int i;
1916
1917 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1918 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1919 continue;
a1255107
AD
1920 if (adev->ip_blocks[i].version->type == block_type)
1921 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1922 }
1923 return true;
1924
1925}
1926
e3ecdffa
AD
1927/**
1928 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1929 *
1930 * @adev: amdgpu_device pointer
87e3f136 1931 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1932 *
1933 * Returns a pointer to the hardware IP block structure
1934 * if it exists for the asic, otherwise NULL.
1935 */
2990a1fc
AD
1936struct amdgpu_ip_block *
1937amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1938 enum amd_ip_block_type type)
d38ceaf9
AD
1939{
1940 int i;
1941
1942 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1943 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1944 return &adev->ip_blocks[i];
1945
1946 return NULL;
1947}
1948
1949/**
2990a1fc 1950 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1951 *
1952 * @adev: amdgpu_device pointer
5fc3aeeb 1953 * @type: enum amd_ip_block_type
d38ceaf9
AD
1954 * @major: major version
1955 * @minor: minor version
1956 *
1957 * return 0 if equal or greater
1958 * return 1 if smaller or the ip_block doesn't exist
1959 */
2990a1fc
AD
1960int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1961 enum amd_ip_block_type type,
1962 u32 major, u32 minor)
d38ceaf9 1963{
2990a1fc 1964 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1965
a1255107
AD
1966 if (ip_block && ((ip_block->version->major > major) ||
1967 ((ip_block->version->major == major) &&
1968 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1969 return 0;
1970
1971 return 1;
1972}
1973
a1255107 1974/**
2990a1fc 1975 * amdgpu_device_ip_block_add
a1255107
AD
1976 *
1977 * @adev: amdgpu_device pointer
1978 * @ip_block_version: pointer to the IP to add
1979 *
1980 * Adds the IP block driver information to the collection of IPs
1981 * on the asic.
1982 */
2990a1fc
AD
1983int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1984 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1985{
1986 if (!ip_block_version)
1987 return -EINVAL;
1988
7bd939d0
LG
1989 switch (ip_block_version->type) {
1990 case AMD_IP_BLOCK_TYPE_VCN:
1991 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1992 return 0;
1993 break;
1994 case AMD_IP_BLOCK_TYPE_JPEG:
1995 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1996 return 0;
1997 break;
1998 default:
1999 break;
2000 }
2001
e966a725 2002 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2003 ip_block_version->funcs->name);
2004
a1255107
AD
2005 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2006
2007 return 0;
2008}
2009
e3ecdffa
AD
2010/**
2011 * amdgpu_device_enable_virtual_display - enable virtual display feature
2012 *
2013 * @adev: amdgpu_device pointer
2014 *
2015 * Enabled the virtual display feature if the user has enabled it via
2016 * the module parameter virtual_display. This feature provides a virtual
2017 * display hardware on headless boards or in virtualized environments.
2018 * This function parses and validates the configuration string specified by
2019 * the user and configues the virtual display configuration (number of
2020 * virtual connectors, crtcs, etc.) specified.
2021 */
483ef985 2022static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2023{
2024 adev->enable_virtual_display = false;
2025
2026 if (amdgpu_virtual_display) {
8f66090b 2027 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2028 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2029
2030 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2031 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2032 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2033 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2034 if (!strcmp("all", pciaddname)
2035 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2036 long num_crtc;
2037 int res = -1;
2038
9accf2fd 2039 adev->enable_virtual_display = true;
0f66356d
ED
2040
2041 if (pciaddname_tmp)
2042 res = kstrtol(pciaddname_tmp, 10,
2043 &num_crtc);
2044
2045 if (!res) {
2046 if (num_crtc < 1)
2047 num_crtc = 1;
2048 if (num_crtc > 6)
2049 num_crtc = 6;
2050 adev->mode_info.num_crtc = num_crtc;
2051 } else {
2052 adev->mode_info.num_crtc = 1;
2053 }
9accf2fd
ED
2054 break;
2055 }
2056 }
2057
0f66356d
ED
2058 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2059 amdgpu_virtual_display, pci_address_name,
2060 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2061
2062 kfree(pciaddstr);
2063 }
2064}
2065
25263da3
AD
2066void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2067{
2068 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2069 adev->mode_info.num_crtc = 1;
2070 adev->enable_virtual_display = true;
2071 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2072 adev->enable_virtual_display, adev->mode_info.num_crtc);
2073 }
2074}
2075
e3ecdffa
AD
2076/**
2077 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2078 *
2079 * @adev: amdgpu_device pointer
2080 *
2081 * Parses the asic configuration parameters specified in the gpu info
2082 * firmware and makes them availale to the driver for use in configuring
2083 * the asic.
2084 * Returns 0 on success, -EINVAL on failure.
2085 */
e2a75f88
AD
2086static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2087{
e2a75f88 2088 const char *chip_name;
c0a43457 2089 char fw_name[40];
e2a75f88
AD
2090 int err;
2091 const struct gpu_info_firmware_header_v1_0 *hdr;
2092
ab4fe3e1
HR
2093 adev->firmware.gpu_info_fw = NULL;
2094
72de33f8 2095 if (adev->mman.discovery_bin) {
cc375d8c
TY
2096 /*
2097 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2098 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2099 * when DAL no longer needs it.
2100 */
2101 if (adev->asic_type != CHIP_NAVI12)
2102 return 0;
258620d0
AD
2103 }
2104
e2a75f88 2105 switch (adev->asic_type) {
e2a75f88
AD
2106 default:
2107 return 0;
2108 case CHIP_VEGA10:
2109 chip_name = "vega10";
2110 break;
3f76dced
AD
2111 case CHIP_VEGA12:
2112 chip_name = "vega12";
2113 break;
2d2e5e7e 2114 case CHIP_RAVEN:
54f78a76 2115 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2116 chip_name = "raven2";
54f78a76 2117 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2118 chip_name = "picasso";
54c4d17e
FX
2119 else
2120 chip_name = "raven";
2d2e5e7e 2121 break;
65e60f6e
LM
2122 case CHIP_ARCTURUS:
2123 chip_name = "arcturus";
2124 break;
42b325e5
XY
2125 case CHIP_NAVI12:
2126 chip_name = "navi12";
2127 break;
e2a75f88
AD
2128 }
2129
2130 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2131 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2132 if (err) {
2133 dev_err(adev->dev,
b31d3063 2134 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2135 fw_name);
2136 goto out;
2137 }
2138
ab4fe3e1 2139 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2140 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2141
2142 switch (hdr->version_major) {
2143 case 1:
2144 {
2145 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2146 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2147 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2148
cc375d8c
TY
2149 /*
2150 * Should be droped when DAL no longer needs it.
2151 */
2152 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2153 goto parse_soc_bounding_box;
2154
b5ab16bf
AD
2155 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2156 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2157 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2158 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2159 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2160 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2161 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2162 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2163 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2164 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2165 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2166 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2167 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2168 adev->gfx.cu_info.max_waves_per_simd =
2169 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2170 adev->gfx.cu_info.max_scratch_slots_per_cu =
2171 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2172 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2173 if (hdr->version_minor >= 1) {
35c2e910
HZ
2174 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2175 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2176 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2177 adev->gfx.config.num_sc_per_sh =
2178 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2179 adev->gfx.config.num_packer_per_sc =
2180 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2181 }
ec51d3fa
XY
2182
2183parse_soc_bounding_box:
ec51d3fa
XY
2184 /*
2185 * soc bounding box info is not integrated in disocovery table,
258620d0 2186 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2187 */
48321c3d
HW
2188 if (hdr->version_minor == 2) {
2189 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2190 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2191 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2192 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2193 }
e2a75f88
AD
2194 break;
2195 }
2196 default:
2197 dev_err(adev->dev,
2198 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2199 err = -EINVAL;
2200 goto out;
2201 }
2202out:
e2a75f88
AD
2203 return err;
2204}
2205
e3ecdffa
AD
2206/**
2207 * amdgpu_device_ip_early_init - run early init for hardware IPs
2208 *
2209 * @adev: amdgpu_device pointer
2210 *
2211 * Early initialization pass for hardware IPs. The hardware IPs that make
2212 * up each asic are discovered each IP's early_init callback is run. This
2213 * is the first stage in initializing the asic.
2214 * Returns 0 on success, negative error code on failure.
2215 */
06ec9070 2216static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2217{
901e2be2
AD
2218 struct drm_device *dev = adev_to_drm(adev);
2219 struct pci_dev *parent;
aaa36a97 2220 int i, r;
ced69502 2221 bool total;
d38ceaf9 2222
483ef985 2223 amdgpu_device_enable_virtual_display(adev);
a6be7570 2224
00a979f3 2225 if (amdgpu_sriov_vf(adev)) {
00a979f3 2226 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2227 if (r)
2228 return r;
00a979f3
WS
2229 }
2230
d38ceaf9 2231 switch (adev->asic_type) {
33f34802
KW
2232#ifdef CONFIG_DRM_AMDGPU_SI
2233 case CHIP_VERDE:
2234 case CHIP_TAHITI:
2235 case CHIP_PITCAIRN:
2236 case CHIP_OLAND:
2237 case CHIP_HAINAN:
295d0daf 2238 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2239 r = si_set_ip_blocks(adev);
2240 if (r)
2241 return r;
2242 break;
2243#endif
a2e73f56
AD
2244#ifdef CONFIG_DRM_AMDGPU_CIK
2245 case CHIP_BONAIRE:
2246 case CHIP_HAWAII:
2247 case CHIP_KAVERI:
2248 case CHIP_KABINI:
2249 case CHIP_MULLINS:
e1ad2d53 2250 if (adev->flags & AMD_IS_APU)
a2e73f56 2251 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2252 else
2253 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2254
2255 r = cik_set_ip_blocks(adev);
2256 if (r)
2257 return r;
2258 break;
2259#endif
da87c30b
AD
2260 case CHIP_TOPAZ:
2261 case CHIP_TONGA:
2262 case CHIP_FIJI:
2263 case CHIP_POLARIS10:
2264 case CHIP_POLARIS11:
2265 case CHIP_POLARIS12:
2266 case CHIP_VEGAM:
2267 case CHIP_CARRIZO:
2268 case CHIP_STONEY:
2269 if (adev->flags & AMD_IS_APU)
2270 adev->family = AMDGPU_FAMILY_CZ;
2271 else
2272 adev->family = AMDGPU_FAMILY_VI;
2273
2274 r = vi_set_ip_blocks(adev);
2275 if (r)
2276 return r;
2277 break;
d38ceaf9 2278 default:
63352b7f
AD
2279 r = amdgpu_discovery_set_ip_blocks(adev);
2280 if (r)
2281 return r;
2282 break;
d38ceaf9
AD
2283 }
2284
901e2be2
AD
2285 if (amdgpu_has_atpx() &&
2286 (amdgpu_is_atpx_hybrid() ||
2287 amdgpu_has_atpx_dgpu_power_cntl()) &&
2288 ((adev->flags & AMD_IS_APU) == 0) &&
2289 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2290 adev->flags |= AMD_IS_PX;
2291
85ac2021 2292 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2293 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2294 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2295 }
901e2be2 2296
1884734a 2297
3b94fb10 2298 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2299 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2300 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2301 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2302 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2303
ced69502 2304 total = true;
d38ceaf9
AD
2305 for (i = 0; i < adev->num_ip_blocks; i++) {
2306 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2307 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2308 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2309 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2310 } else {
a1255107
AD
2311 if (adev->ip_blocks[i].version->funcs->early_init) {
2312 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2313 if (r == -ENOENT) {
a1255107 2314 adev->ip_blocks[i].status.valid = false;
2c1a2784 2315 } else if (r) {
a1255107
AD
2316 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2317 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2318 total = false;
2c1a2784 2319 } else {
a1255107 2320 adev->ip_blocks[i].status.valid = true;
2c1a2784 2321 }
974e6b64 2322 } else {
a1255107 2323 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2324 }
d38ceaf9 2325 }
21a249ca
AD
2326 /* get the vbios after the asic_funcs are set up */
2327 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2328 r = amdgpu_device_parse_gpu_info_fw(adev);
2329 if (r)
2330 return r;
2331
21a249ca 2332 /* Read BIOS */
9535a86a
SZ
2333 if (amdgpu_device_read_bios(adev)) {
2334 if (!amdgpu_get_bios(adev))
2335 return -EINVAL;
21a249ca 2336
9535a86a
SZ
2337 r = amdgpu_atombios_init(adev);
2338 if (r) {
2339 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2340 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2341 return r;
2342 }
21a249ca 2343 }
77eabc6f
PJZ
2344
2345 /*get pf2vf msg info at it's earliest time*/
2346 if (amdgpu_sriov_vf(adev))
2347 amdgpu_virt_init_data_exchange(adev);
2348
21a249ca 2349 }
d38ceaf9 2350 }
ced69502
ML
2351 if (!total)
2352 return -ENODEV;
d38ceaf9 2353
00fa4035 2354 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2355 adev->cg_flags &= amdgpu_cg_mask;
2356 adev->pg_flags &= amdgpu_pg_mask;
2357
d38ceaf9
AD
2358 return 0;
2359}
2360
0a4f2520
RZ
2361static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2362{
2363 int i, r;
2364
2365 for (i = 0; i < adev->num_ip_blocks; i++) {
2366 if (!adev->ip_blocks[i].status.sw)
2367 continue;
2368 if (adev->ip_blocks[i].status.hw)
2369 continue;
2370 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2371 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2372 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2373 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2374 if (r) {
2375 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2376 adev->ip_blocks[i].version->funcs->name, r);
2377 return r;
2378 }
2379 adev->ip_blocks[i].status.hw = true;
2380 }
2381 }
2382
2383 return 0;
2384}
2385
2386static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2387{
2388 int i, r;
2389
2390 for (i = 0; i < adev->num_ip_blocks; i++) {
2391 if (!adev->ip_blocks[i].status.sw)
2392 continue;
2393 if (adev->ip_blocks[i].status.hw)
2394 continue;
2395 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2396 if (r) {
2397 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2398 adev->ip_blocks[i].version->funcs->name, r);
2399 return r;
2400 }
2401 adev->ip_blocks[i].status.hw = true;
2402 }
2403
2404 return 0;
2405}
2406
7a3e0bb2
RZ
2407static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2408{
2409 int r = 0;
2410 int i;
80f41f84 2411 uint32_t smu_version;
7a3e0bb2
RZ
2412
2413 if (adev->asic_type >= CHIP_VEGA10) {
2414 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2415 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2416 continue;
2417
e3c1b071 2418 if (!adev->ip_blocks[i].status.sw)
2419 continue;
2420
482f0e53
ML
2421 /* no need to do the fw loading again if already done*/
2422 if (adev->ip_blocks[i].status.hw == true)
2423 break;
2424
53b3f8f4 2425 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2426 r = adev->ip_blocks[i].version->funcs->resume(adev);
2427 if (r) {
2428 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2429 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2430 return r;
2431 }
2432 } else {
2433 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2434 if (r) {
2435 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2436 adev->ip_blocks[i].version->funcs->name, r);
2437 return r;
7a3e0bb2 2438 }
7a3e0bb2 2439 }
482f0e53
ML
2440
2441 adev->ip_blocks[i].status.hw = true;
2442 break;
7a3e0bb2
RZ
2443 }
2444 }
482f0e53 2445
8973d9ec
ED
2446 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2447 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2448
80f41f84 2449 return r;
7a3e0bb2
RZ
2450}
2451
5fd8518d
AG
2452static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2453{
2454 long timeout;
2455 int r, i;
2456
2457 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2458 struct amdgpu_ring *ring = adev->rings[i];
2459
2460 /* No need to setup the GPU scheduler for rings that don't need it */
2461 if (!ring || ring->no_scheduler)
2462 continue;
2463
2464 switch (ring->funcs->type) {
2465 case AMDGPU_RING_TYPE_GFX:
2466 timeout = adev->gfx_timeout;
2467 break;
2468 case AMDGPU_RING_TYPE_COMPUTE:
2469 timeout = adev->compute_timeout;
2470 break;
2471 case AMDGPU_RING_TYPE_SDMA:
2472 timeout = adev->sdma_timeout;
2473 break;
2474 default:
2475 timeout = adev->video_timeout;
2476 break;
2477 }
2478
2479 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2480 ring->num_hw_submission, 0,
8ab62eda
JG
2481 timeout, adev->reset_domain->wq,
2482 ring->sched_score, ring->name,
2483 adev->dev);
5fd8518d
AG
2484 if (r) {
2485 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2486 ring->name);
2487 return r;
2488 }
2489 }
2490
d425c6f4
JZ
2491 amdgpu_xcp_update_partition_sched_list(adev);
2492
5fd8518d
AG
2493 return 0;
2494}
2495
2496
e3ecdffa
AD
2497/**
2498 * amdgpu_device_ip_init - run init for hardware IPs
2499 *
2500 * @adev: amdgpu_device pointer
2501 *
2502 * Main initialization pass for hardware IPs. The list of all the hardware
2503 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2504 * are run. sw_init initializes the software state associated with each IP
2505 * and hw_init initializes the hardware associated with each IP.
2506 * Returns 0 on success, negative error code on failure.
2507 */
06ec9070 2508static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2509{
2510 int i, r;
2511
c030f2e4 2512 r = amdgpu_ras_init(adev);
2513 if (r)
2514 return r;
2515
d38ceaf9 2516 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2517 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2518 continue;
a1255107 2519 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2520 if (r) {
a1255107
AD
2521 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2522 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2523 goto init_failed;
2c1a2784 2524 }
a1255107 2525 adev->ip_blocks[i].status.sw = true;
bfca0289 2526
c1c39032
AD
2527 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2528 /* need to do common hw init early so everything is set up for gmc */
2529 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2530 if (r) {
2531 DRM_ERROR("hw_init %d failed %d\n", i, r);
2532 goto init_failed;
2533 }
2534 adev->ip_blocks[i].status.hw = true;
2535 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2536 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2537 /* Try to reserve bad pages early */
2538 if (amdgpu_sriov_vf(adev))
2539 amdgpu_virt_exchange_data(adev);
2540
7ccfd79f 2541 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2542 if (r) {
7ccfd79f 2543 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2544 goto init_failed;
2c1a2784 2545 }
a1255107 2546 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2547 if (r) {
2548 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2549 goto init_failed;
2c1a2784 2550 }
06ec9070 2551 r = amdgpu_device_wb_init(adev);
2c1a2784 2552 if (r) {
06ec9070 2553 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2554 goto init_failed;
2c1a2784 2555 }
a1255107 2556 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2557
2558 /* right after GMC hw init, we create CSA */
02ff519e 2559 if (adev->gfx.mcbp) {
1e256e27 2560 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2561 AMDGPU_GEM_DOMAIN_VRAM |
2562 AMDGPU_GEM_DOMAIN_GTT,
2563 AMDGPU_CSA_SIZE);
2493664f
ML
2564 if (r) {
2565 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2566 goto init_failed;
2493664f
ML
2567 }
2568 }
d38ceaf9
AD
2569 }
2570 }
2571
c9ffa427 2572 if (amdgpu_sriov_vf(adev))
22c16d25 2573 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2574
533aed27
AG
2575 r = amdgpu_ib_pool_init(adev);
2576 if (r) {
2577 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2578 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2579 goto init_failed;
2580 }
2581
c8963ea4
RZ
2582 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2583 if (r)
72d3f592 2584 goto init_failed;
0a4f2520
RZ
2585
2586 r = amdgpu_device_ip_hw_init_phase1(adev);
2587 if (r)
72d3f592 2588 goto init_failed;
0a4f2520 2589
7a3e0bb2
RZ
2590 r = amdgpu_device_fw_loading(adev);
2591 if (r)
72d3f592 2592 goto init_failed;
7a3e0bb2 2593
0a4f2520
RZ
2594 r = amdgpu_device_ip_hw_init_phase2(adev);
2595 if (r)
72d3f592 2596 goto init_failed;
d38ceaf9 2597
121a2bc6
AG
2598 /*
2599 * retired pages will be loaded from eeprom and reserved here,
2600 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2601 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2602 * for I2C communication which only true at this point.
b82e65a9
GC
2603 *
2604 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2605 * failure from bad gpu situation and stop amdgpu init process
2606 * accordingly. For other failed cases, it will still release all
2607 * the resource and print error message, rather than returning one
2608 * negative value to upper level.
121a2bc6
AG
2609 *
2610 * Note: theoretically, this should be called before all vram allocations
2611 * to protect retired page from abusing
2612 */
b82e65a9
GC
2613 r = amdgpu_ras_recovery_init(adev);
2614 if (r)
2615 goto init_failed;
121a2bc6 2616
cfbb6b00
AG
2617 /**
2618 * In case of XGMI grab extra reference for reset domain for this device
2619 */
a4c63caf 2620 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2621 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2622 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2623 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2624
dfd0287b
LH
2625 if (WARN_ON(!hive)) {
2626 r = -ENOENT;
2627 goto init_failed;
2628 }
2629
46c67660 2630 if (!hive->reset_domain ||
2631 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2632 r = -ENOENT;
2633 amdgpu_put_xgmi_hive(hive);
2634 goto init_failed;
2635 }
2636
2637 /* Drop the early temporary reset domain we created for device */
2638 amdgpu_reset_put_reset_domain(adev->reset_domain);
2639 adev->reset_domain = hive->reset_domain;
9dfa4860 2640 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2641 }
a4c63caf
AG
2642 }
2643 }
2644
5fd8518d
AG
2645 r = amdgpu_device_init_schedulers(adev);
2646 if (r)
2647 goto init_failed;
e3c1b071 2648
2649 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2650 if (!adev->gmc.xgmi.pending_reset) {
2651 kgd2kfd_init_zone_device(adev);
e3c1b071 2652 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2653 }
c6332b97 2654
bd607166
KR
2655 amdgpu_fru_get_product_info(adev);
2656
72d3f592 2657init_failed:
c6332b97 2658
72d3f592 2659 return r;
d38ceaf9
AD
2660}
2661
e3ecdffa
AD
2662/**
2663 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2664 *
2665 * @adev: amdgpu_device pointer
2666 *
2667 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2668 * this function before a GPU reset. If the value is retained after a
2669 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2670 */
06ec9070 2671static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2672{
2673 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2674}
2675
e3ecdffa
AD
2676/**
2677 * amdgpu_device_check_vram_lost - check if vram is valid
2678 *
2679 * @adev: amdgpu_device pointer
2680 *
2681 * Checks the reset magic value written to the gart pointer in VRAM.
2682 * The driver calls this after a GPU reset to see if the contents of
2683 * VRAM is lost or now.
2684 * returns true if vram is lost, false if not.
2685 */
06ec9070 2686static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2687{
dadce777
EQ
2688 if (memcmp(adev->gart.ptr, adev->reset_magic,
2689 AMDGPU_RESET_MAGIC_NUM))
2690 return true;
2691
53b3f8f4 2692 if (!amdgpu_in_reset(adev))
dadce777
EQ
2693 return false;
2694
2695 /*
2696 * For all ASICs with baco/mode1 reset, the VRAM is
2697 * always assumed to be lost.
2698 */
2699 switch (amdgpu_asic_reset_method(adev)) {
2700 case AMD_RESET_METHOD_BACO:
2701 case AMD_RESET_METHOD_MODE1:
2702 return true;
2703 default:
2704 return false;
2705 }
0c49e0b8
CZ
2706}
2707
e3ecdffa 2708/**
1112a46b 2709 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2710 *
2711 * @adev: amdgpu_device pointer
b8b72130 2712 * @state: clockgating state (gate or ungate)
e3ecdffa 2713 *
e3ecdffa 2714 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2715 * set_clockgating_state callbacks are run.
2716 * Late initialization pass enabling clockgating for hardware IPs.
2717 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2718 * Returns 0 on success, negative error code on failure.
2719 */
fdd34271 2720
5d89bb2d
LL
2721int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2722 enum amd_clockgating_state state)
d38ceaf9 2723{
1112a46b 2724 int i, j, r;
d38ceaf9 2725
4a2ba394
SL
2726 if (amdgpu_emu_mode == 1)
2727 return 0;
2728
1112a46b
RZ
2729 for (j = 0; j < adev->num_ip_blocks; j++) {
2730 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2731 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2732 continue;
47198eb7 2733 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2734 if (adev->in_s0ix &&
47198eb7
AD
2735 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2736 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2737 continue;
4a446d55 2738 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2739 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2740 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2741 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2742 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2743 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2744 /* enable clockgating to save power */
a1255107 2745 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2746 state);
4a446d55
AD
2747 if (r) {
2748 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2749 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2750 return r;
2751 }
b0b00ff1 2752 }
d38ceaf9 2753 }
06b18f61 2754
c9f96fd5
RZ
2755 return 0;
2756}
2757
5d89bb2d
LL
2758int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2759 enum amd_powergating_state state)
c9f96fd5 2760{
1112a46b 2761 int i, j, r;
06b18f61 2762
c9f96fd5
RZ
2763 if (amdgpu_emu_mode == 1)
2764 return 0;
2765
1112a46b
RZ
2766 for (j = 0; j < adev->num_ip_blocks; j++) {
2767 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2768 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2769 continue;
47198eb7 2770 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2771 if (adev->in_s0ix &&
47198eb7
AD
2772 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2773 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2774 continue;
c9f96fd5
RZ
2775 /* skip CG for VCE/UVD, it's handled specially */
2776 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2777 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2778 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2779 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2780 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2781 /* enable powergating to save power */
2782 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2783 state);
c9f96fd5
RZ
2784 if (r) {
2785 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2786 adev->ip_blocks[i].version->funcs->name, r);
2787 return r;
2788 }
2789 }
2790 }
2dc80b00
S
2791 return 0;
2792}
2793
beff74bc
AD
2794static int amdgpu_device_enable_mgpu_fan_boost(void)
2795{
2796 struct amdgpu_gpu_instance *gpu_ins;
2797 struct amdgpu_device *adev;
2798 int i, ret = 0;
2799
2800 mutex_lock(&mgpu_info.mutex);
2801
2802 /*
2803 * MGPU fan boost feature should be enabled
2804 * only when there are two or more dGPUs in
2805 * the system
2806 */
2807 if (mgpu_info.num_dgpu < 2)
2808 goto out;
2809
2810 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2811 gpu_ins = &(mgpu_info.gpu_ins[i]);
2812 adev = gpu_ins->adev;
2813 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2814 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2815 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2816 if (ret)
2817 break;
2818
2819 gpu_ins->mgpu_fan_enabled = 1;
2820 }
2821 }
2822
2823out:
2824 mutex_unlock(&mgpu_info.mutex);
2825
2826 return ret;
2827}
2828
e3ecdffa
AD
2829/**
2830 * amdgpu_device_ip_late_init - run late init for hardware IPs
2831 *
2832 * @adev: amdgpu_device pointer
2833 *
2834 * Late initialization pass for hardware IPs. The list of all the hardware
2835 * IPs that make up the asic is walked and the late_init callbacks are run.
2836 * late_init covers any special initialization that an IP requires
2837 * after all of the have been initialized or something that needs to happen
2838 * late in the init process.
2839 * Returns 0 on success, negative error code on failure.
2840 */
06ec9070 2841static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2842{
60599a03 2843 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2844 int i = 0, r;
2845
2846 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2847 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2848 continue;
2849 if (adev->ip_blocks[i].version->funcs->late_init) {
2850 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2851 if (r) {
2852 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2853 adev->ip_blocks[i].version->funcs->name, r);
2854 return r;
2855 }
2dc80b00 2856 }
73f847db 2857 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2858 }
2859
867e24ca 2860 r = amdgpu_ras_late_init(adev);
2861 if (r) {
2862 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2863 return r;
2864 }
2865
a891d239
DL
2866 amdgpu_ras_set_error_query_ready(adev, true);
2867
1112a46b
RZ
2868 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2869 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2870
06ec9070 2871 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2872
beff74bc
AD
2873 r = amdgpu_device_enable_mgpu_fan_boost();
2874 if (r)
2875 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2876
4da8b639 2877 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2878 if (amdgpu_passthrough(adev) &&
2879 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2880 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2881 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2882
2883 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2884 mutex_lock(&mgpu_info.mutex);
2885
2886 /*
2887 * Reset device p-state to low as this was booted with high.
2888 *
2889 * This should be performed only after all devices from the same
2890 * hive get initialized.
2891 *
2892 * However, it's unknown how many device in the hive in advance.
2893 * As this is counted one by one during devices initializations.
2894 *
2895 * So, we wait for all XGMI interlinked devices initialized.
2896 * This may bring some delays as those devices may come from
2897 * different hives. But that should be OK.
2898 */
2899 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2900 for (i = 0; i < mgpu_info.num_gpu; i++) {
2901 gpu_instance = &(mgpu_info.gpu_ins[i]);
2902 if (gpu_instance->adev->flags & AMD_IS_APU)
2903 continue;
2904
d84a430d
JK
2905 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2906 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2907 if (r) {
2908 DRM_ERROR("pstate setting failed (%d).\n", r);
2909 break;
2910 }
2911 }
2912 }
2913
2914 mutex_unlock(&mgpu_info.mutex);
2915 }
2916
d38ceaf9
AD
2917 return 0;
2918}
2919
613aa3ea
LY
2920/**
2921 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2922 *
2923 * @adev: amdgpu_device pointer
2924 *
2925 * For ASICs need to disable SMC first
2926 */
2927static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2928{
2929 int i, r;
2930
4e8303cf 2931 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
2932 return;
2933
2934 for (i = 0; i < adev->num_ip_blocks; i++) {
2935 if (!adev->ip_blocks[i].status.hw)
2936 continue;
2937 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2938 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2939 /* XXX handle errors */
2940 if (r) {
2941 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2942 adev->ip_blocks[i].version->funcs->name, r);
2943 }
2944 adev->ip_blocks[i].status.hw = false;
2945 break;
2946 }
2947 }
2948}
2949
e9669fb7 2950static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2951{
2952 int i, r;
2953
e9669fb7
AG
2954 for (i = 0; i < adev->num_ip_blocks; i++) {
2955 if (!adev->ip_blocks[i].version->funcs->early_fini)
2956 continue;
5278a159 2957
e9669fb7
AG
2958 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2959 if (r) {
2960 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2961 adev->ip_blocks[i].version->funcs->name, r);
2962 }
2963 }
c030f2e4 2964
05df1f01 2965 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2966 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2967
7270e895
TY
2968 amdgpu_amdkfd_suspend(adev, false);
2969
613aa3ea
LY
2970 /* Workaroud for ASICs need to disable SMC first */
2971 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2972
d38ceaf9 2973 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2974 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2975 continue;
8201a67a 2976
a1255107 2977 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2978 /* XXX handle errors */
2c1a2784 2979 if (r) {
a1255107
AD
2980 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2981 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2982 }
8201a67a 2983
a1255107 2984 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2985 }
2986
6effad8a
GC
2987 if (amdgpu_sriov_vf(adev)) {
2988 if (amdgpu_virt_release_full_gpu(adev, false))
2989 DRM_ERROR("failed to release exclusive mode on fini\n");
2990 }
2991
e9669fb7
AG
2992 return 0;
2993}
2994
2995/**
2996 * amdgpu_device_ip_fini - run fini for hardware IPs
2997 *
2998 * @adev: amdgpu_device pointer
2999 *
3000 * Main teardown pass for hardware IPs. The list of all the hardware
3001 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3002 * are run. hw_fini tears down the hardware associated with each IP
3003 * and sw_fini tears down any software state associated with each IP.
3004 * Returns 0 on success, negative error code on failure.
3005 */
3006static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3007{
3008 int i, r;
3009
3010 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3011 amdgpu_virt_release_ras_err_handler_data(adev);
3012
e9669fb7
AG
3013 if (adev->gmc.xgmi.num_physical_nodes > 1)
3014 amdgpu_xgmi_remove_device(adev);
3015
c004d44e 3016 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3017
d38ceaf9 3018 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3019 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3020 continue;
c12aba3a
ML
3021
3022 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3023 amdgpu_ucode_free_bo(adev);
1e256e27 3024 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3025 amdgpu_device_wb_fini(adev);
7ccfd79f 3026 amdgpu_device_mem_scratch_fini(adev);
533aed27 3027 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3028 }
3029
a1255107 3030 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3031 /* XXX handle errors */
2c1a2784 3032 if (r) {
a1255107
AD
3033 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3034 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3035 }
a1255107
AD
3036 adev->ip_blocks[i].status.sw = false;
3037 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3038 }
3039
a6dcfd9c 3040 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3041 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3042 continue;
a1255107
AD
3043 if (adev->ip_blocks[i].version->funcs->late_fini)
3044 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3045 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3046 }
3047
c030f2e4 3048 amdgpu_ras_fini(adev);
3049
d38ceaf9
AD
3050 return 0;
3051}
3052
e3ecdffa 3053/**
beff74bc 3054 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3055 *
1112a46b 3056 * @work: work_struct.
e3ecdffa 3057 */
beff74bc 3058static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3059{
3060 struct amdgpu_device *adev =
beff74bc 3061 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3062 int r;
3063
3064 r = amdgpu_ib_ring_tests(adev);
3065 if (r)
3066 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3067}
3068
1e317b99
RZ
3069static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3070{
3071 struct amdgpu_device *adev =
3072 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3073
90a92662
MD
3074 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3075 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3076
3077 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3078 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3079}
3080
e3ecdffa 3081/**
e7854a03 3082 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3083 *
3084 * @adev: amdgpu_device pointer
3085 *
3086 * Main suspend function for hardware IPs. The list of all the hardware
3087 * IPs that make up the asic is walked, clockgating is disabled and the
3088 * suspend callbacks are run. suspend puts the hardware and software state
3089 * in each IP into a state suitable for suspend.
3090 * Returns 0 on success, negative error code on failure.
3091 */
e7854a03
AD
3092static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3093{
3094 int i, r;
3095
50ec83f0
AD
3096 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3097 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3098
b31d6ada
EQ
3099 /*
3100 * Per PMFW team's suggestion, driver needs to handle gfxoff
3101 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3102 * scenario. Add the missing df cstate disablement here.
3103 */
3104 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3105 dev_warn(adev->dev, "Failed to disallow df cstate");
3106
e7854a03
AD
3107 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3108 if (!adev->ip_blocks[i].status.valid)
3109 continue;
2b9f7848 3110
e7854a03 3111 /* displays are handled separately */
2b9f7848
ND
3112 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3113 continue;
3114
3115 /* XXX handle errors */
3116 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3117 /* XXX handle errors */
3118 if (r) {
3119 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3120 adev->ip_blocks[i].version->funcs->name, r);
3121 return r;
e7854a03 3122 }
2b9f7848
ND
3123
3124 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3125 }
3126
e7854a03
AD
3127 return 0;
3128}
3129
3130/**
3131 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3132 *
3133 * @adev: amdgpu_device pointer
3134 *
3135 * Main suspend function for hardware IPs. The list of all the hardware
3136 * IPs that make up the asic is walked, clockgating is disabled and the
3137 * suspend callbacks are run. suspend puts the hardware and software state
3138 * in each IP into a state suitable for suspend.
3139 * Returns 0 on success, negative error code on failure.
3140 */
3141static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3142{
3143 int i, r;
3144
557f42a2 3145 if (adev->in_s0ix)
bc143d8b 3146 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3147
d38ceaf9 3148 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3149 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3150 continue;
e7854a03
AD
3151 /* displays are handled in phase1 */
3152 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3153 continue;
bff77e86
LM
3154 /* PSP lost connection when err_event_athub occurs */
3155 if (amdgpu_ras_intr_triggered() &&
3156 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3157 adev->ip_blocks[i].status.hw = false;
3158 continue;
3159 }
e3c1b071 3160
3161 /* skip unnecessary suspend if we do not initialize them yet */
3162 if (adev->gmc.xgmi.pending_reset &&
3163 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3164 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3165 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3166 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3167 adev->ip_blocks[i].status.hw = false;
3168 continue;
3169 }
557f42a2 3170
afa6646b 3171 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3172 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3173 * like at runtime. PSP is also part of the always on hardware
3174 * so no need to suspend it.
3175 */
557f42a2 3176 if (adev->in_s0ix &&
32ff160d 3177 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3178 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3179 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3180 continue;
3181
2a7798ea
AD
3182 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3183 if (adev->in_s0ix &&
4e8303cf
LL
3184 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3185 IP_VERSION(5, 0, 0)) &&
3186 (adev->ip_blocks[i].version->type ==
3187 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3188 continue;
3189
e11c7750
TH
3190 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3191 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3192 * from this location and RLC Autoload automatically also gets loaded
3193 * from here based on PMFW -> PSP message during re-init sequence.
3194 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3195 * the TMR and reload FWs again for IMU enabled APU ASICs.
3196 */
3197 if (amdgpu_in_reset(adev) &&
3198 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3199 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3200 continue;
3201
d38ceaf9 3202 /* XXX handle errors */
a1255107 3203 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3204 /* XXX handle errors */
2c1a2784 3205 if (r) {
a1255107
AD
3206 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3207 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3208 }
876923fb 3209 adev->ip_blocks[i].status.hw = false;
a3a09142 3210 /* handle putting the SMC in the appropriate state */
47fc644f 3211 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3212 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3213 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3214 if (r) {
3215 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3216 adev->mp1_state, r);
3217 return r;
3218 }
a3a09142
AD
3219 }
3220 }
d38ceaf9
AD
3221 }
3222
3223 return 0;
3224}
3225
e7854a03
AD
3226/**
3227 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3228 *
3229 * @adev: amdgpu_device pointer
3230 *
3231 * Main suspend function for hardware IPs. The list of all the hardware
3232 * IPs that make up the asic is walked, clockgating is disabled and the
3233 * suspend callbacks are run. suspend puts the hardware and software state
3234 * in each IP into a state suitable for suspend.
3235 * Returns 0 on success, negative error code on failure.
3236 */
3237int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3238{
3239 int r;
3240
3c73683c
JC
3241 if (amdgpu_sriov_vf(adev)) {
3242 amdgpu_virt_fini_data_exchange(adev);
e7819644 3243 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3244 }
e7819644 3245
e7854a03
AD
3246 r = amdgpu_device_ip_suspend_phase1(adev);
3247 if (r)
3248 return r;
3249 r = amdgpu_device_ip_suspend_phase2(adev);
3250
e7819644
YT
3251 if (amdgpu_sriov_vf(adev))
3252 amdgpu_virt_release_full_gpu(adev, false);
3253
e7854a03
AD
3254 return r;
3255}
3256
06ec9070 3257static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3258{
3259 int i, r;
3260
2cb681b6 3261 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3262 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3263 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3264 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3265 AMD_IP_BLOCK_TYPE_IH,
3266 };
a90ad3c2 3267
95ea3dbc 3268 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3269 int j;
3270 struct amdgpu_ip_block *block;
a90ad3c2 3271
4cd2a96d
J
3272 block = &adev->ip_blocks[i];
3273 block->status.hw = false;
2cb681b6 3274
4cd2a96d 3275 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3276
4cd2a96d 3277 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3278 !block->status.valid)
3279 continue;
3280
3281 r = block->version->funcs->hw_init(adev);
0aaeefcc 3282 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3283 if (r)
3284 return r;
482f0e53 3285 block->status.hw = true;
a90ad3c2
ML
3286 }
3287 }
3288
3289 return 0;
3290}
3291
06ec9070 3292static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3293{
3294 int i, r;
3295
2cb681b6
ML
3296 static enum amd_ip_block_type ip_order[] = {
3297 AMD_IP_BLOCK_TYPE_SMC,
3298 AMD_IP_BLOCK_TYPE_DCE,
3299 AMD_IP_BLOCK_TYPE_GFX,
3300 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3301 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3302 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3303 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3304 AMD_IP_BLOCK_TYPE_VCN,
3305 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3306 };
a90ad3c2 3307
2cb681b6
ML
3308 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3309 int j;
3310 struct amdgpu_ip_block *block;
a90ad3c2 3311
2cb681b6
ML
3312 for (j = 0; j < adev->num_ip_blocks; j++) {
3313 block = &adev->ip_blocks[j];
3314
3315 if (block->version->type != ip_order[i] ||
482f0e53
ML
3316 !block->status.valid ||
3317 block->status.hw)
2cb681b6
ML
3318 continue;
3319
895bd048
JZ
3320 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3321 r = block->version->funcs->resume(adev);
3322 else
3323 r = block->version->funcs->hw_init(adev);
3324
0aaeefcc 3325 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3326 if (r)
3327 return r;
482f0e53 3328 block->status.hw = true;
a90ad3c2
ML
3329 }
3330 }
3331
3332 return 0;
3333}
3334
e3ecdffa
AD
3335/**
3336 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3337 *
3338 * @adev: amdgpu_device pointer
3339 *
3340 * First resume function for hardware IPs. The list of all the hardware
3341 * IPs that make up the asic is walked and the resume callbacks are run for
3342 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3343 * after a suspend and updates the software state as necessary. This
3344 * function is also used for restoring the GPU after a GPU reset.
3345 * Returns 0 on success, negative error code on failure.
3346 */
06ec9070 3347static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3348{
3349 int i, r;
3350
a90ad3c2 3351 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3352 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3353 continue;
a90ad3c2 3354 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3355 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3356 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3357 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3358
fcf0649f
CZ
3359 r = adev->ip_blocks[i].version->funcs->resume(adev);
3360 if (r) {
3361 DRM_ERROR("resume of IP block <%s> failed %d\n",
3362 adev->ip_blocks[i].version->funcs->name, r);
3363 return r;
3364 }
482f0e53 3365 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3366 }
3367 }
3368
3369 return 0;
3370}
3371
e3ecdffa
AD
3372/**
3373 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3374 *
3375 * @adev: amdgpu_device pointer
3376 *
3377 * First resume function for hardware IPs. The list of all the hardware
3378 * IPs that make up the asic is walked and the resume callbacks are run for
3379 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3380 * functional state after a suspend and updates the software state as
3381 * necessary. This function is also used for restoring the GPU after a GPU
3382 * reset.
3383 * Returns 0 on success, negative error code on failure.
3384 */
06ec9070 3385static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3386{
3387 int i, r;
3388
3389 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3390 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3391 continue;
fcf0649f 3392 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3393 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3394 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3395 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3396 continue;
a1255107 3397 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3398 if (r) {
a1255107
AD
3399 DRM_ERROR("resume of IP block <%s> failed %d\n",
3400 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3401 return r;
2c1a2784 3402 }
482f0e53 3403 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3404 }
3405
3406 return 0;
3407}
3408
e3ecdffa
AD
3409/**
3410 * amdgpu_device_ip_resume - run resume for hardware IPs
3411 *
3412 * @adev: amdgpu_device pointer
3413 *
3414 * Main resume function for hardware IPs. The hardware IPs
3415 * are split into two resume functions because they are
b8920e1e 3416 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3417 * steps need to be take between them. In this case (S3/S4) they are
3418 * run sequentially.
3419 * Returns 0 on success, negative error code on failure.
3420 */
06ec9070 3421static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3422{
3423 int r;
3424
06ec9070 3425 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3426 if (r)
3427 return r;
7a3e0bb2
RZ
3428
3429 r = amdgpu_device_fw_loading(adev);
3430 if (r)
3431 return r;
3432
06ec9070 3433 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3434
3435 return r;
3436}
3437
e3ecdffa
AD
3438/**
3439 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3440 *
3441 * @adev: amdgpu_device pointer
3442 *
3443 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3444 */
4e99a44e 3445static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3446{
6867e1b5
ML
3447 if (amdgpu_sriov_vf(adev)) {
3448 if (adev->is_atom_fw) {
58ff791a 3449 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3450 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3451 } else {
3452 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3453 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3454 }
3455
3456 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3457 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3458 }
048765ad
AR
3459}
3460
e3ecdffa
AD
3461/**
3462 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3463 *
3464 * @asic_type: AMD asic type
3465 *
3466 * Check if there is DC (new modesetting infrastructre) support for an asic.
3467 * returns true if DC has support, false if not.
3468 */
4562236b
HW
3469bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3470{
3471 switch (asic_type) {
0637d417
AD
3472#ifdef CONFIG_DRM_AMDGPU_SI
3473 case CHIP_HAINAN:
3474#endif
3475 case CHIP_TOPAZ:
3476 /* chips with no display hardware */
3477 return false;
4562236b 3478#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3479 case CHIP_TAHITI:
3480 case CHIP_PITCAIRN:
3481 case CHIP_VERDE:
3482 case CHIP_OLAND:
2d32ffd6
AD
3483 /*
3484 * We have systems in the wild with these ASICs that require
3485 * LVDS and VGA support which is not supported with DC.
3486 *
3487 * Fallback to the non-DC driver here by default so as not to
3488 * cause regressions.
3489 */
3490#if defined(CONFIG_DRM_AMD_DC_SI)
3491 return amdgpu_dc > 0;
3492#else
3493 return false;
64200c46 3494#endif
4562236b 3495 case CHIP_BONAIRE:
0d6fbccb 3496 case CHIP_KAVERI:
367e6687
AD
3497 case CHIP_KABINI:
3498 case CHIP_MULLINS:
d9fda248
HW
3499 /*
3500 * We have systems in the wild with these ASICs that require
b5a0168e 3501 * VGA support which is not supported with DC.
d9fda248
HW
3502 *
3503 * Fallback to the non-DC driver here by default so as not to
3504 * cause regressions.
3505 */
3506 return amdgpu_dc > 0;
f7f12b25 3507 default:
fd187853 3508 return amdgpu_dc != 0;
f7f12b25 3509#else
4562236b 3510 default:
93b09a9a 3511 if (amdgpu_dc > 0)
b8920e1e 3512 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3513 return false;
f7f12b25 3514#endif
4562236b
HW
3515 }
3516}
3517
3518/**
3519 * amdgpu_device_has_dc_support - check if dc is supported
3520 *
982a820b 3521 * @adev: amdgpu_device pointer
4562236b
HW
3522 *
3523 * Returns true for supported, false for not supported
3524 */
3525bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3526{
25263da3 3527 if (adev->enable_virtual_display ||
abaf210c 3528 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3529 return false;
3530
4562236b
HW
3531 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3532}
3533
d4535e2c
AG
3534static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3535{
3536 struct amdgpu_device *adev =
3537 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3538 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3539
c6a6e2db
AG
3540 /* It's a bug to not have a hive within this function */
3541 if (WARN_ON(!hive))
3542 return;
3543
3544 /*
3545 * Use task barrier to synchronize all xgmi reset works across the
3546 * hive. task_barrier_enter and task_barrier_exit will block
3547 * until all the threads running the xgmi reset works reach
3548 * those points. task_barrier_full will do both blocks.
3549 */
3550 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3551
3552 task_barrier_enter(&hive->tb);
4a580877 3553 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3554
3555 if (adev->asic_reset_res)
3556 goto fail;
3557
3558 task_barrier_exit(&hive->tb);
4a580877 3559 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3560
3561 if (adev->asic_reset_res)
3562 goto fail;
43c4d576 3563
5e67bba3 3564 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3565 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3566 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3567 } else {
3568
3569 task_barrier_full(&hive->tb);
3570 adev->asic_reset_res = amdgpu_asic_reset(adev);
3571 }
ce316fa5 3572
c6a6e2db 3573fail:
d4535e2c 3574 if (adev->asic_reset_res)
fed184e9 3575 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3576 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3577 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3578}
3579
71f98027
AD
3580static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3581{
3582 char *input = amdgpu_lockup_timeout;
3583 char *timeout_setting = NULL;
3584 int index = 0;
3585 long timeout;
3586 int ret = 0;
3587
3588 /*
67387dfe
AD
3589 * By default timeout for non compute jobs is 10000
3590 * and 60000 for compute jobs.
71f98027 3591 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3592 * jobs are 60000 by default.
71f98027
AD
3593 */
3594 adev->gfx_timeout = msecs_to_jiffies(10000);
3595 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3596 if (amdgpu_sriov_vf(adev))
3597 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3598 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3599 else
67387dfe 3600 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3601
f440ff44 3602 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3603 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3604 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3605 ret = kstrtol(timeout_setting, 0, &timeout);
3606 if (ret)
3607 return ret;
3608
3609 if (timeout == 0) {
3610 index++;
3611 continue;
3612 } else if (timeout < 0) {
3613 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3614 dev_warn(adev->dev, "lockup timeout disabled");
3615 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3616 } else {
3617 timeout = msecs_to_jiffies(timeout);
3618 }
3619
3620 switch (index++) {
3621 case 0:
3622 adev->gfx_timeout = timeout;
3623 break;
3624 case 1:
3625 adev->compute_timeout = timeout;
3626 break;
3627 case 2:
3628 adev->sdma_timeout = timeout;
3629 break;
3630 case 3:
3631 adev->video_timeout = timeout;
3632 break;
3633 default:
3634 break;
3635 }
3636 }
3637 /*
3638 * There is only one value specified and
3639 * it should apply to all non-compute jobs.
3640 */
bcccee89 3641 if (index == 1) {
71f98027 3642 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3643 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3644 adev->compute_timeout = adev->gfx_timeout;
3645 }
71f98027
AD
3646 }
3647
3648 return ret;
3649}
d4535e2c 3650
4a74c38c
PY
3651/**
3652 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3653 *
3654 * @adev: amdgpu_device pointer
3655 *
3656 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3657 */
3658static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3659{
3660 struct iommu_domain *domain;
3661
3662 domain = iommu_get_domain_for_dev(adev->dev);
3663 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3664 adev->ram_is_direct_mapped = true;
3665}
3666
77f3a5cd 3667static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3668 &dev_attr_pcie_replay_count.attr,
3669 NULL
3670};
3671
02ff519e
AD
3672static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3673{
3674 if (amdgpu_mcbp == 1)
3675 adev->gfx.mcbp = true;
1e9e15dc
JZ
3676 else if (amdgpu_mcbp == 0)
3677 adev->gfx.mcbp = false;
4e8303cf
LL
3678 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3679 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3680 adev->gfx.num_gfx_rings)
50a7c876
AD
3681 adev->gfx.mcbp = true;
3682
02ff519e
AD
3683 if (amdgpu_sriov_vf(adev))
3684 adev->gfx.mcbp = true;
3685
3686 if (adev->gfx.mcbp)
3687 DRM_INFO("MCBP is enabled\n");
3688}
3689
d38ceaf9
AD
3690/**
3691 * amdgpu_device_init - initialize the driver
3692 *
3693 * @adev: amdgpu_device pointer
d38ceaf9
AD
3694 * @flags: driver flags
3695 *
3696 * Initializes the driver info and hw (all asics).
3697 * Returns 0 for success or an error on failure.
3698 * Called at driver startup.
3699 */
3700int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3701 uint32_t flags)
3702{
8aba21b7
LT
3703 struct drm_device *ddev = adev_to_drm(adev);
3704 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3705 int r, i;
b98c6299 3706 bool px = false;
95844d20 3707 u32 max_MBps;
59e9fff1 3708 int tmp;
d38ceaf9
AD
3709
3710 adev->shutdown = false;
d38ceaf9 3711 adev->flags = flags;
4e66d7d2
YZ
3712
3713 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3714 adev->asic_type = amdgpu_force_asic_type;
3715 else
3716 adev->asic_type = flags & AMD_ASIC_MASK;
3717
d38ceaf9 3718 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3719 if (amdgpu_emu_mode == 1)
8bdab6bb 3720 adev->usec_timeout *= 10;
770d13b1 3721 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3722 adev->accel_working = false;
3723 adev->num_rings = 0;
68ce8b24 3724 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3725 adev->mman.buffer_funcs = NULL;
3726 adev->mman.buffer_funcs_ring = NULL;
3727 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3728 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3729 adev->gmc.gmc_funcs = NULL;
7bd939d0 3730 adev->harvest_ip_mask = 0x0;
f54d1867 3731 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3732 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3733
3734 adev->smc_rreg = &amdgpu_invalid_rreg;
3735 adev->smc_wreg = &amdgpu_invalid_wreg;
3736 adev->pcie_rreg = &amdgpu_invalid_rreg;
3737 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3738 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3739 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3740 adev->pciep_rreg = &amdgpu_invalid_rreg;
3741 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3742 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3743 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3744 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3745 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3746 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3747 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3748 adev->didt_rreg = &amdgpu_invalid_rreg;
3749 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3750 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3751 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3752 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3753 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3754
3e39ab90
AD
3755 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3756 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3757 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3758
3759 /* mutex initialization are all done here so we
b8920e1e
SS
3760 * can recall function without having locking issues
3761 */
0e5ca0d1 3762 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3763 mutex_init(&adev->pm.mutex);
3764 mutex_init(&adev->gfx.gpu_clock_mutex);
3765 mutex_init(&adev->srbm_mutex);
b8866c26 3766 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3767 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3768 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3769 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3770 mutex_init(&adev->mn_lock);
e23b74aa 3771 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3772 hash_init(adev->mn_hash);
32eaeae0 3773 mutex_init(&adev->psp.mutex);
bd052211 3774 mutex_init(&adev->notifier_lock);
8cda7a4f 3775 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3776 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3777
ab3b9de6 3778 amdgpu_device_init_apu_flags(adev);
9f6a7857 3779
912dfc84
EQ
3780 r = amdgpu_device_check_arguments(adev);
3781 if (r)
3782 return r;
d38ceaf9 3783
d38ceaf9
AD
3784 spin_lock_init(&adev->mmio_idx_lock);
3785 spin_lock_init(&adev->smc_idx_lock);
3786 spin_lock_init(&adev->pcie_idx_lock);
3787 spin_lock_init(&adev->uvd_ctx_idx_lock);
3788 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3789 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3790 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3791 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3792 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3793
0c4e7fa5
CZ
3794 INIT_LIST_HEAD(&adev->shadow_list);
3795 mutex_init(&adev->shadow_list_lock);
3796
655ce9cb 3797 INIT_LIST_HEAD(&adev->reset_list);
3798
6492e1b0 3799 INIT_LIST_HEAD(&adev->ras_list);
3800
3e38b634
EQ
3801 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3802
beff74bc
AD
3803 INIT_DELAYED_WORK(&adev->delayed_init_work,
3804 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3805 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3806 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3807
d4535e2c
AG
3808 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3809
d23ee13f 3810 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3811 adev->gfx.gfx_off_residency = 0;
3812 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3813 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3814
b265bdbd
EQ
3815 atomic_set(&adev->throttling_logging_enabled, 1);
3816 /*
3817 * If throttling continues, logging will be performed every minute
3818 * to avoid log flooding. "-1" is subtracted since the thermal
3819 * throttling interrupt comes every second. Thus, the total logging
3820 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3821 * for throttling interrupt) = 60 seconds.
3822 */
3823 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3824 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3825
0fa49558
AX
3826 /* Registers mapping */
3827 /* TODO: block userspace mapping of io register */
da69c161
KW
3828 if (adev->asic_type >= CHIP_BONAIRE) {
3829 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3830 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3831 } else {
3832 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3833 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3834 }
d38ceaf9 3835
6c08e0ef
EQ
3836 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3837 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3838
d38ceaf9 3839 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3840 if (!adev->rmmio)
d38ceaf9 3841 return -ENOMEM;
b8920e1e 3842
d38ceaf9 3843 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3844 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3845
436afdfa
PY
3846 /*
3847 * Reset domain needs to be present early, before XGMI hive discovered
3848 * (if any) and intitialized to use reset sem and in_gpu reset flag
3849 * early on during init and before calling to RREG32.
3850 */
3851 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3852 if (!adev->reset_domain)
3853 return -ENOMEM;
3854
3aa0115d
ML
3855 /* detect hw virtualization here */
3856 amdgpu_detect_virtualization(adev);
3857
04e85958
TL
3858 amdgpu_device_get_pcie_info(adev);
3859
dffa11b4
ML
3860 r = amdgpu_device_get_job_timeout_settings(adev);
3861 if (r) {
3862 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3863 return r;
a190d1c7
XY
3864 }
3865
d38ceaf9 3866 /* early init functions */
06ec9070 3867 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3868 if (r)
4ef87d8f 3869 return r;
d38ceaf9 3870
02ff519e
AD
3871 amdgpu_device_set_mcbp(adev);
3872
b7cdb41e
ML
3873 /* Get rid of things like offb */
3874 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3875 if (r)
3876 return r;
3877
4d33e704
SK
3878 /* Enable TMZ based on IP_VERSION */
3879 amdgpu_gmc_tmz_set(adev);
3880
957b0787 3881 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3882 /* Need to get xgmi info early to decide the reset behavior*/
3883 if (adev->gmc.xgmi.supported) {
3884 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3885 if (r)
3886 return r;
3887 }
3888
8e6d0b69 3889 /* enable PCIE atomic ops */
b4520bfd
GW
3890 if (amdgpu_sriov_vf(adev)) {
3891 if (adev->virt.fw_reserve.p_pf2vf)
3892 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3893 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3894 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3895 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3896 * internal path natively support atomics, set have_atomics_support to true.
3897 */
b4520bfd 3898 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
3899 (amdgpu_ip_version(adev, GC_HWIP, 0) >
3900 IP_VERSION(9, 0, 0))) {
0e768043 3901 adev->have_atomics_support = true;
b4520bfd 3902 } else {
8e6d0b69 3903 adev->have_atomics_support =
3904 !pci_enable_atomic_ops_to_root(adev->pdev,
3905 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3906 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3907 }
3908
8e6d0b69 3909 if (!adev->have_atomics_support)
3910 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3911
6585661d 3912 /* doorbell bar mapping and doorbell index init*/
43c064db 3913 amdgpu_doorbell_init(adev);
6585661d 3914
9475a943
SL
3915 if (amdgpu_emu_mode == 1) {
3916 /* post the asic on emulation mode */
3917 emu_soc_asic_init(adev);
bfca0289 3918 goto fence_driver_init;
9475a943 3919 }
bfca0289 3920
04442bf7
LL
3921 amdgpu_reset_init(adev);
3922
4e99a44e 3923 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3924 if (adev->bios)
3925 amdgpu_device_detect_sriov_bios(adev);
048765ad 3926
95e8e59e
AD
3927 /* check if we need to reset the asic
3928 * E.g., driver was not cleanly unloaded previously, etc.
3929 */
f14899fd 3930 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3931 if (adev->gmc.xgmi.num_physical_nodes) {
3932 dev_info(adev->dev, "Pending hive reset.\n");
3933 adev->gmc.xgmi.pending_reset = true;
3934 /* Only need to init necessary block for SMU to handle the reset */
3935 for (i = 0; i < adev->num_ip_blocks; i++) {
3936 if (!adev->ip_blocks[i].status.valid)
3937 continue;
3938 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3939 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3940 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3941 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3942 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3943 adev->ip_blocks[i].version->funcs->name);
3944 adev->ip_blocks[i].status.hw = true;
3945 }
3946 }
3947 } else {
59e9fff1 3948 tmp = amdgpu_reset_method;
3949 /* It should do a default reset when loading or reloading the driver,
3950 * regardless of the module parameter reset_method.
3951 */
3952 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3953 r = amdgpu_asic_reset(adev);
59e9fff1 3954 amdgpu_reset_method = tmp;
e3c1b071 3955 if (r) {
3956 dev_err(adev->dev, "asic reset on init failed\n");
3957 goto failed;
3958 }
95e8e59e
AD
3959 }
3960 }
3961
d38ceaf9 3962 /* Post card if necessary */
39c640c0 3963 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3964 if (!adev->bios) {
bec86378 3965 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3966 r = -EINVAL;
3967 goto failed;
d38ceaf9 3968 }
bec86378 3969 DRM_INFO("GPU posting now...\n");
4d2997ab 3970 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3971 if (r) {
3972 dev_err(adev->dev, "gpu post error!\n");
3973 goto failed;
3974 }
d38ceaf9
AD
3975 }
3976
9535a86a
SZ
3977 if (adev->bios) {
3978 if (adev->is_atom_fw) {
3979 /* Initialize clocks */
3980 r = amdgpu_atomfirmware_get_clock_info(adev);
3981 if (r) {
3982 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3983 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3984 goto failed;
3985 }
3986 } else {
3987 /* Initialize clocks */
3988 r = amdgpu_atombios_get_clock_info(adev);
3989 if (r) {
3990 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3991 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3992 goto failed;
3993 }
3994 /* init i2c buses */
3995 if (!amdgpu_device_has_dc_support(adev))
3996 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3997 }
2c1a2784 3998 }
d38ceaf9 3999
bfca0289 4000fence_driver_init:
d38ceaf9 4001 /* Fence driver */
067f44c8 4002 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4003 if (r) {
067f44c8 4004 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4005 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4006 goto failed;
2c1a2784 4007 }
d38ceaf9
AD
4008
4009 /* init the mode config */
4a580877 4010 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4011
06ec9070 4012 r = amdgpu_device_ip_init(adev);
d38ceaf9 4013 if (r) {
06ec9070 4014 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4015 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4016 goto release_ras_con;
d38ceaf9
AD
4017 }
4018
8d35a259
LG
4019 amdgpu_fence_driver_hw_init(adev);
4020
d69b8971
YZ
4021 dev_info(adev->dev,
4022 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4023 adev->gfx.config.max_shader_engines,
4024 adev->gfx.config.max_sh_per_se,
4025 adev->gfx.config.max_cu_per_sh,
4026 adev->gfx.cu_info.number);
4027
d38ceaf9
AD
4028 adev->accel_working = true;
4029
e59c0205
AX
4030 amdgpu_vm_check_compute_bug(adev);
4031
95844d20
MO
4032 /* Initialize the buffer migration limit. */
4033 if (amdgpu_moverate >= 0)
4034 max_MBps = amdgpu_moverate;
4035 else
4036 max_MBps = 8; /* Allow 8 MB/s. */
4037 /* Get a log2 for easy divisions. */
4038 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4039
b0adca4d
EQ
4040 /*
4041 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4042 * Otherwise the mgpu fan boost feature will be skipped due to the
4043 * gpu instance is counted less.
4044 */
4045 amdgpu_register_gpu_instance(adev);
4046
d38ceaf9
AD
4047 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4048 * explicit gating rather than handling it automatically.
4049 */
e3c1b071 4050 if (!adev->gmc.xgmi.pending_reset) {
4051 r = amdgpu_device_ip_late_init(adev);
4052 if (r) {
4053 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4054 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4055 goto release_ras_con;
e3c1b071 4056 }
4057 /* must succeed. */
4058 amdgpu_ras_resume(adev);
4059 queue_delayed_work(system_wq, &adev->delayed_init_work,
4060 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4061 }
d38ceaf9 4062
38eecbe0
CL
4063 if (amdgpu_sriov_vf(adev)) {
4064 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4065 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4066 }
2c738637 4067
90bcb9b5
EQ
4068 /*
4069 * Place those sysfs registering after `late_init`. As some of those
4070 * operations performed in `late_init` might affect the sysfs
4071 * interfaces creating.
4072 */
4073 r = amdgpu_atombios_sysfs_init(adev);
4074 if (r)
4075 drm_err(&adev->ddev,
4076 "registering atombios sysfs failed (%d).\n", r);
4077
4078 r = amdgpu_pm_sysfs_init(adev);
4079 if (r)
4080 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4081
4082 r = amdgpu_ucode_sysfs_init(adev);
4083 if (r) {
4084 adev->ucode_sysfs_en = false;
4085 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4086 } else
4087 adev->ucode_sysfs_en = true;
4088
77f3a5cd 4089 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4090 if (r)
77f3a5cd 4091 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4092
76da73f0
LL
4093 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4094 if (r)
4095 dev_err(adev->dev,
4096 "Could not create amdgpu board attributes\n");
4097
7957ec80
LL
4098 amdgpu_fru_sysfs_init(adev);
4099
d155bef0
AB
4100 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4101 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4102 if (r)
4103 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4104
c1dd4aa6
AG
4105 /* Have stored pci confspace at hand for restore in sudden PCI error */
4106 if (amdgpu_device_cache_pci_state(adev->pdev))
4107 pci_restore_state(pdev);
4108
8c3dd61c
KHF
4109 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4110 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4111 * ignore it
4112 */
8c3dd61c 4113 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4114 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4115
d37a3929
OC
4116 px = amdgpu_device_supports_px(ddev);
4117
4118 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4119 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4120 vga_switcheroo_register_client(adev->pdev,
4121 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4122
4123 if (px)
8c3dd61c 4124 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4125
e3c1b071 4126 if (adev->gmc.xgmi.pending_reset)
4127 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4128 msecs_to_jiffies(AMDGPU_RESUME_MS));
4129
4a74c38c
PY
4130 amdgpu_device_check_iommu_direct_map(adev);
4131
d38ceaf9 4132 return 0;
83ba126a 4133
970fd197 4134release_ras_con:
38eecbe0
CL
4135 if (amdgpu_sriov_vf(adev))
4136 amdgpu_virt_release_full_gpu(adev, true);
4137
4138 /* failed in exclusive mode due to timeout */
4139 if (amdgpu_sriov_vf(adev) &&
4140 !amdgpu_sriov_runtime(adev) &&
4141 amdgpu_virt_mmio_blocked(adev) &&
4142 !amdgpu_virt_wait_reset(adev)) {
4143 dev_err(adev->dev, "VF exclusive mode timeout\n");
4144 /* Don't send request since VF is inactive. */
4145 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4146 adev->virt.ops = NULL;
4147 r = -EAGAIN;
4148 }
970fd197
SY
4149 amdgpu_release_ras_context(adev);
4150
83ba126a 4151failed:
89041940 4152 amdgpu_vf_error_trans_all(adev);
8840a387 4153
83ba126a 4154 return r;
d38ceaf9
AD
4155}
4156
07775fc1
AG
4157static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4158{
62d5f9f7 4159
07775fc1
AG
4160 /* Clear all CPU mappings pointing to this device */
4161 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4162
4163 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4164 amdgpu_doorbell_fini(adev);
07775fc1
AG
4165
4166 iounmap(adev->rmmio);
4167 adev->rmmio = NULL;
4168 if (adev->mman.aper_base_kaddr)
4169 iounmap(adev->mman.aper_base_kaddr);
4170 adev->mman.aper_base_kaddr = NULL;
4171
4172 /* Memory manager related */
a0ba1279 4173 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4174 arch_phys_wc_del(adev->gmc.vram_mtrr);
4175 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4176 }
4177}
4178
d38ceaf9 4179/**
bbe04dec 4180 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4181 *
4182 * @adev: amdgpu_device pointer
4183 *
4184 * Tear down the driver info (all asics).
4185 * Called at driver shutdown.
4186 */
72c8c97b 4187void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4188{
aac89168 4189 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4190 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4191 adev->shutdown = true;
9f875167 4192
752c683d
ML
4193 /* make sure IB test finished before entering exclusive mode
4194 * to avoid preemption on IB test
b8920e1e 4195 */
519b8b76 4196 if (amdgpu_sriov_vf(adev)) {
752c683d 4197 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4198 amdgpu_virt_fini_data_exchange(adev);
4199 }
752c683d 4200
e5b03032
ML
4201 /* disable all interrupts */
4202 amdgpu_irq_disable_all(adev);
47fc644f 4203 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4204 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4205 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4206 else
4a580877 4207 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4208 }
8d35a259 4209 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4210
cd3a8a59 4211 if (adev->mman.initialized)
9bff18d1 4212 drain_workqueue(adev->mman.bdev.wq);
98f56188 4213
53e9d836 4214 if (adev->pm.sysfs_initialized)
7c868b59 4215 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4216 if (adev->ucode_sysfs_en)
4217 amdgpu_ucode_sysfs_fini(adev);
4218 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4219 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4220
232d1d43
SY
4221 /* disable ras feature must before hw fini */
4222 amdgpu_ras_pre_fini(adev);
4223
e9669fb7 4224 amdgpu_device_ip_fini_early(adev);
d10d0daa 4225
a3848df6
YW
4226 amdgpu_irq_fini_hw(adev);
4227
b6fd6e0f
SK
4228 if (adev->mman.initialized)
4229 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4230
d10d0daa 4231 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4232
39934d3e
VP
4233 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4234 amdgpu_device_unmap_mmio(adev);
87172e89 4235
72c8c97b
AG
4236}
4237
4238void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4239{
62d5f9f7 4240 int idx;
d37a3929 4241 bool px;
62d5f9f7 4242
8d35a259 4243 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4244 amdgpu_device_ip_fini(adev);
b31d3063 4245 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4246 adev->accel_working = false;
68ce8b24 4247 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4248
4249 amdgpu_reset_fini(adev);
4250
d38ceaf9 4251 /* free i2c buses */
4562236b
HW
4252 if (!amdgpu_device_has_dc_support(adev))
4253 amdgpu_i2c_fini(adev);
bfca0289
SL
4254
4255 if (amdgpu_emu_mode != 1)
4256 amdgpu_atombios_fini(adev);
4257
d38ceaf9
AD
4258 kfree(adev->bios);
4259 adev->bios = NULL;
d37a3929
OC
4260
4261 px = amdgpu_device_supports_px(adev_to_drm(adev));
4262
4263 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4264 apple_gmux_detect(NULL, NULL)))
84c8b22e 4265 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4266
4267 if (px)
83ba126a 4268 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4269
38d6be81 4270 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4271 vga_client_unregister(adev->pdev);
e9bc1bf7 4272
62d5f9f7
LS
4273 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4274
4275 iounmap(adev->rmmio);
4276 adev->rmmio = NULL;
43c064db 4277 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4278 drm_dev_exit(idx);
4279 }
4280
d155bef0
AB
4281 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4282 amdgpu_pmu_fini(adev);
72de33f8 4283 if (adev->mman.discovery_bin)
a190d1c7 4284 amdgpu_discovery_fini(adev);
72c8c97b 4285
cfbb6b00
AG
4286 amdgpu_reset_put_reset_domain(adev->reset_domain);
4287 adev->reset_domain = NULL;
4288
72c8c97b
AG
4289 kfree(adev->pci_state);
4290
d38ceaf9
AD
4291}
4292
58144d28
ND
4293/**
4294 * amdgpu_device_evict_resources - evict device resources
4295 * @adev: amdgpu device object
4296 *
4297 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4298 * of the vram memory type. Mainly used for evicting device resources
4299 * at suspend time.
4300 *
4301 */
7863c155 4302static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4303{
7863c155
ML
4304 int ret;
4305
e53d9665
ML
4306 /* No need to evict vram on APUs for suspend to ram or s2idle */
4307 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4308 return 0;
58144d28 4309
7863c155
ML
4310 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4311 if (ret)
58144d28 4312 DRM_WARN("evicting device resources failed\n");
7863c155 4313 return ret;
58144d28 4314}
d38ceaf9
AD
4315
4316/*
4317 * Suspend & resume.
4318 */
4319/**
810ddc3a 4320 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4321 *
87e3f136 4322 * @dev: drm dev pointer
87e3f136 4323 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4324 *
4325 * Puts the hw in the suspend state (all asics).
4326 * Returns 0 for success or an error on failure.
4327 * Called at driver suspend.
4328 */
de185019 4329int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4330{
a2e15b0e 4331 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4332 int r = 0;
d38ceaf9 4333
d38ceaf9
AD
4334 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4335 return 0;
4336
44779b43 4337 adev->in_suspend = true;
3fa8f89d 4338
47ea2076
SF
4339 /* Evict the majority of BOs before grabbing the full access */
4340 r = amdgpu_device_evict_resources(adev);
4341 if (r)
4342 return r;
4343
d7274ec7
BZ
4344 if (amdgpu_sriov_vf(adev)) {
4345 amdgpu_virt_fini_data_exchange(adev);
4346 r = amdgpu_virt_request_full_gpu(adev, false);
4347 if (r)
4348 return r;
4349 }
4350
3fa8f89d
S
4351 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4352 DRM_WARN("smart shift update failed\n");
4353
5f818173 4354 if (fbcon)
087451f3 4355 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4356
beff74bc 4357 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4358 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4359
5e6932fe 4360 amdgpu_ras_suspend(adev);
4361
2196927b 4362 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4363
c004d44e 4364 if (!adev->in_s0ix)
5d3a2d95 4365 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4366
7863c155
ML
4367 r = amdgpu_device_evict_resources(adev);
4368 if (r)
4369 return r;
d38ceaf9 4370
8d35a259 4371 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4372
2196927b 4373 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4374
d7274ec7
BZ
4375 if (amdgpu_sriov_vf(adev))
4376 amdgpu_virt_release_full_gpu(adev, false);
4377
d38ceaf9
AD
4378 return 0;
4379}
4380
4381/**
810ddc3a 4382 * amdgpu_device_resume - initiate device resume
d38ceaf9 4383 *
87e3f136 4384 * @dev: drm dev pointer
87e3f136 4385 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4386 *
4387 * Bring the hw back to operating state (all asics).
4388 * Returns 0 for success or an error on failure.
4389 * Called at driver resume.
4390 */
de185019 4391int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4392{
1348969a 4393 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4394 int r = 0;
d38ceaf9 4395
d7274ec7
BZ
4396 if (amdgpu_sriov_vf(adev)) {
4397 r = amdgpu_virt_request_full_gpu(adev, true);
4398 if (r)
4399 return r;
4400 }
4401
d38ceaf9
AD
4402 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4403 return 0;
4404
62498733 4405 if (adev->in_s0ix)
bc143d8b 4406 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4407
d38ceaf9 4408 /* post card */
39c640c0 4409 if (amdgpu_device_need_post(adev)) {
4d2997ab 4410 r = amdgpu_device_asic_init(adev);
74b0b157 4411 if (r)
aac89168 4412 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4413 }
d38ceaf9 4414
06ec9070 4415 r = amdgpu_device_ip_resume(adev);
d7274ec7 4416
e6707218 4417 if (r) {
aac89168 4418 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4419 goto exit;
e6707218 4420 }
8d35a259 4421 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4422
06ec9070 4423 r = amdgpu_device_ip_late_init(adev);
03161a6e 4424 if (r)
3c22c1ea 4425 goto exit;
d38ceaf9 4426
beff74bc
AD
4427 queue_delayed_work(system_wq, &adev->delayed_init_work,
4428 msecs_to_jiffies(AMDGPU_RESUME_MS));
4429
c004d44e 4430 if (!adev->in_s0ix) {
5d3a2d95
AD
4431 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4432 if (r)
3c22c1ea 4433 goto exit;
5d3a2d95 4434 }
756e6880 4435
3c22c1ea
SF
4436exit:
4437 if (amdgpu_sriov_vf(adev)) {
4438 amdgpu_virt_init_data_exchange(adev);
4439 amdgpu_virt_release_full_gpu(adev, true);
4440 }
4441
4442 if (r)
4443 return r;
4444
96a5d8d4 4445 /* Make sure IB tests flushed */
beff74bc 4446 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4447
a2e15b0e 4448 if (fbcon)
087451f3 4449 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4450
5e6932fe 4451 amdgpu_ras_resume(adev);
4452
d09ef243
AD
4453 if (adev->mode_info.num_crtc) {
4454 /*
4455 * Most of the connector probing functions try to acquire runtime pm
4456 * refs to ensure that the GPU is powered on when connector polling is
4457 * performed. Since we're calling this from a runtime PM callback,
4458 * trying to acquire rpm refs will cause us to deadlock.
4459 *
4460 * Since we're guaranteed to be holding the rpm lock, it's safe to
4461 * temporarily disable the rpm helpers so this doesn't deadlock us.
4462 */
23a1a9e5 4463#ifdef CONFIG_PM
d09ef243 4464 dev->dev->power.disable_depth++;
23a1a9e5 4465#endif
d09ef243
AD
4466 if (!adev->dc_enabled)
4467 drm_helper_hpd_irq_event(dev);
4468 else
4469 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4470#ifdef CONFIG_PM
d09ef243 4471 dev->dev->power.disable_depth--;
23a1a9e5 4472#endif
d09ef243 4473 }
44779b43
RZ
4474 adev->in_suspend = false;
4475
dc907c9d
JX
4476 if (adev->enable_mes)
4477 amdgpu_mes_self_test(adev);
4478
3fa8f89d
S
4479 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4480 DRM_WARN("smart shift update failed\n");
4481
4d3b9ae5 4482 return 0;
d38ceaf9
AD
4483}
4484
e3ecdffa
AD
4485/**
4486 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4487 *
4488 * @adev: amdgpu_device pointer
4489 *
4490 * The list of all the hardware IPs that make up the asic is walked and
4491 * the check_soft_reset callbacks are run. check_soft_reset determines
4492 * if the asic is still hung or not.
4493 * Returns true if any of the IPs are still in a hung state, false if not.
4494 */
06ec9070 4495static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4496{
4497 int i;
4498 bool asic_hang = false;
4499
f993d628
ML
4500 if (amdgpu_sriov_vf(adev))
4501 return true;
4502
8bc04c29
AD
4503 if (amdgpu_asic_need_full_reset(adev))
4504 return true;
4505
63fbf42f 4506 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4507 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4508 continue;
a1255107
AD
4509 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4510 adev->ip_blocks[i].status.hang =
4511 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4512 if (adev->ip_blocks[i].status.hang) {
aac89168 4513 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4514 asic_hang = true;
4515 }
4516 }
4517 return asic_hang;
4518}
4519
e3ecdffa
AD
4520/**
4521 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4522 *
4523 * @adev: amdgpu_device pointer
4524 *
4525 * The list of all the hardware IPs that make up the asic is walked and the
4526 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4527 * handles any IP specific hardware or software state changes that are
4528 * necessary for a soft reset to succeed.
4529 * Returns 0 on success, negative error code on failure.
4530 */
06ec9070 4531static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4532{
4533 int i, r = 0;
4534
4535 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4536 if (!adev->ip_blocks[i].status.valid)
d31a501e 4537 continue;
a1255107
AD
4538 if (adev->ip_blocks[i].status.hang &&
4539 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4540 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4541 if (r)
4542 return r;
4543 }
4544 }
4545
4546 return 0;
4547}
4548
e3ecdffa
AD
4549/**
4550 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4551 *
4552 * @adev: amdgpu_device pointer
4553 *
4554 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4555 * reset is necessary to recover.
4556 * Returns true if a full asic reset is required, false if not.
4557 */
06ec9070 4558static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4559{
da146d3b
AD
4560 int i;
4561
8bc04c29
AD
4562 if (amdgpu_asic_need_full_reset(adev))
4563 return true;
4564
da146d3b 4565 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4566 if (!adev->ip_blocks[i].status.valid)
da146d3b 4567 continue;
a1255107
AD
4568 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4569 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4570 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4571 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4572 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4573 if (adev->ip_blocks[i].status.hang) {
aac89168 4574 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4575 return true;
4576 }
4577 }
35d782fe
CZ
4578 }
4579 return false;
4580}
4581
e3ecdffa
AD
4582/**
4583 * amdgpu_device_ip_soft_reset - do a soft reset
4584 *
4585 * @adev: amdgpu_device pointer
4586 *
4587 * The list of all the hardware IPs that make up the asic is walked and the
4588 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4589 * IP specific hardware or software state changes that are necessary to soft
4590 * reset the IP.
4591 * Returns 0 on success, negative error code on failure.
4592 */
06ec9070 4593static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4594{
4595 int i, r = 0;
4596
4597 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4598 if (!adev->ip_blocks[i].status.valid)
35d782fe 4599 continue;
a1255107
AD
4600 if (adev->ip_blocks[i].status.hang &&
4601 adev->ip_blocks[i].version->funcs->soft_reset) {
4602 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4603 if (r)
4604 return r;
4605 }
4606 }
4607
4608 return 0;
4609}
4610
e3ecdffa
AD
4611/**
4612 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4613 *
4614 * @adev: amdgpu_device pointer
4615 *
4616 * The list of all the hardware IPs that make up the asic is walked and the
4617 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4618 * handles any IP specific hardware or software state changes that are
4619 * necessary after the IP has been soft reset.
4620 * Returns 0 on success, negative error code on failure.
4621 */
06ec9070 4622static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4623{
4624 int i, r = 0;
4625
4626 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4627 if (!adev->ip_blocks[i].status.valid)
35d782fe 4628 continue;
a1255107
AD
4629 if (adev->ip_blocks[i].status.hang &&
4630 adev->ip_blocks[i].version->funcs->post_soft_reset)
4631 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4632 if (r)
4633 return r;
4634 }
4635
4636 return 0;
4637}
4638
e3ecdffa 4639/**
c33adbc7 4640 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4641 *
4642 * @adev: amdgpu_device pointer
4643 *
4644 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4645 * restore things like GPUVM page tables after a GPU reset where
4646 * the contents of VRAM might be lost.
403009bf
CK
4647 *
4648 * Returns:
4649 * 0 on success, negative error code on failure.
e3ecdffa 4650 */
c33adbc7 4651static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4652{
c41d1cf6 4653 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4654 struct amdgpu_bo *shadow;
e18aaea7 4655 struct amdgpu_bo_vm *vmbo;
403009bf 4656 long r = 1, tmo;
c41d1cf6
ML
4657
4658 if (amdgpu_sriov_runtime(adev))
b045d3af 4659 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4660 else
4661 tmo = msecs_to_jiffies(100);
4662
aac89168 4663 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4664 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4665 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4666 /* If vm is compute context or adev is APU, shadow will be NULL */
4667 if (!vmbo->shadow)
4668 continue;
4669 shadow = vmbo->shadow;
4670
403009bf 4671 /* No need to recover an evicted BO */
d3116756
CK
4672 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4673 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4674 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4675 continue;
4676
4677 r = amdgpu_bo_restore_shadow(shadow, &next);
4678 if (r)
4679 break;
4680
c41d1cf6 4681 if (fence) {
1712fb1a 4682 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4683 dma_fence_put(fence);
4684 fence = next;
1712fb1a 4685 if (tmo == 0) {
4686 r = -ETIMEDOUT;
c41d1cf6 4687 break;
1712fb1a 4688 } else if (tmo < 0) {
4689 r = tmo;
4690 break;
4691 }
403009bf
CK
4692 } else {
4693 fence = next;
c41d1cf6 4694 }
c41d1cf6
ML
4695 }
4696 mutex_unlock(&adev->shadow_list_lock);
4697
403009bf
CK
4698 if (fence)
4699 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4700 dma_fence_put(fence);
4701
1712fb1a 4702 if (r < 0 || tmo <= 0) {
aac89168 4703 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4704 return -EIO;
4705 }
c41d1cf6 4706
aac89168 4707 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4708 return 0;
c41d1cf6
ML
4709}
4710
a90ad3c2 4711
e3ecdffa 4712/**
06ec9070 4713 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4714 *
982a820b 4715 * @adev: amdgpu_device pointer
87e3f136 4716 * @from_hypervisor: request from hypervisor
5740682e
ML
4717 *
4718 * do VF FLR and reinitialize Asic
3f48c681 4719 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4720 */
4721static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4722 bool from_hypervisor)
5740682e
ML
4723{
4724 int r;
a5f67c93 4725 struct amdgpu_hive_info *hive = NULL;
7258fa31 4726 int retry_limit = 0;
5740682e 4727
7258fa31 4728retry:
c004d44e 4729 amdgpu_amdkfd_pre_reset(adev);
428890a3 4730
5740682e
ML
4731 if (from_hypervisor)
4732 r = amdgpu_virt_request_full_gpu(adev, true);
4733 else
4734 r = amdgpu_virt_reset_gpu(adev);
4735 if (r)
4736 return r;
f734b213 4737 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4738
83f24a8f
HC
4739 /* some sw clean up VF needs to do before recover */
4740 amdgpu_virt_post_reset(adev);
4741
a90ad3c2 4742 /* Resume IP prior to SMC */
06ec9070 4743 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4744 if (r)
4745 goto error;
a90ad3c2 4746
c9ffa427 4747 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4748
7a3e0bb2
RZ
4749 r = amdgpu_device_fw_loading(adev);
4750 if (r)
4751 return r;
4752
a90ad3c2 4753 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4754 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4755 if (r)
4756 goto error;
a90ad3c2 4757
a5f67c93
ZL
4758 hive = amdgpu_get_xgmi_hive(adev);
4759 /* Update PSP FW topology after reset */
4760 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4761 r = amdgpu_xgmi_update_topology(hive, adev);
4762
4763 if (hive)
4764 amdgpu_put_xgmi_hive(hive);
4765
4766 if (!r) {
a5f67c93 4767 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4768
c004d44e 4769 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4770 }
a90ad3c2 4771
abc34253 4772error:
c41d1cf6 4773 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4774 amdgpu_inc_vram_lost(adev);
c33adbc7 4775 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4776 }
437f3e0b 4777 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4778
7258fa31
SK
4779 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4780 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4781 retry_limit++;
4782 goto retry;
4783 } else
4784 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4785 }
4786
a90ad3c2
ML
4787 return r;
4788}
4789
9a1cddd6 4790/**
4791 * amdgpu_device_has_job_running - check if there is any job in mirror list
4792 *
982a820b 4793 * @adev: amdgpu_device pointer
9a1cddd6 4794 *
4795 * check if there is any job in mirror list
4796 */
4797bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4798{
4799 int i;
4800 struct drm_sched_job *job;
4801
4802 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4803 struct amdgpu_ring *ring = adev->rings[i];
4804
4805 if (!ring || !ring->sched.thread)
4806 continue;
4807
4808 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4809 job = list_first_entry_or_null(&ring->sched.pending_list,
4810 struct drm_sched_job, list);
9a1cddd6 4811 spin_unlock(&ring->sched.job_list_lock);
4812 if (job)
4813 return true;
4814 }
4815 return false;
4816}
4817
12938fad
CK
4818/**
4819 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4820 *
982a820b 4821 * @adev: amdgpu_device pointer
12938fad
CK
4822 *
4823 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4824 * a hung GPU.
4825 */
4826bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4827{
12938fad 4828
3ba7b418
AG
4829 if (amdgpu_gpu_recovery == 0)
4830 goto disabled;
4831
1a11a65d
YC
4832 /* Skip soft reset check in fatal error mode */
4833 if (!amdgpu_ras_is_poison_mode_supported(adev))
4834 return true;
4835
3ba7b418
AG
4836 if (amdgpu_sriov_vf(adev))
4837 return true;
4838
4839 if (amdgpu_gpu_recovery == -1) {
4840 switch (adev->asic_type) {
b3523c45
AD
4841#ifdef CONFIG_DRM_AMDGPU_SI
4842 case CHIP_VERDE:
4843 case CHIP_TAHITI:
4844 case CHIP_PITCAIRN:
4845 case CHIP_OLAND:
4846 case CHIP_HAINAN:
4847#endif
4848#ifdef CONFIG_DRM_AMDGPU_CIK
4849 case CHIP_KAVERI:
4850 case CHIP_KABINI:
4851 case CHIP_MULLINS:
4852#endif
4853 case CHIP_CARRIZO:
4854 case CHIP_STONEY:
4855 case CHIP_CYAN_SKILLFISH:
3ba7b418 4856 goto disabled;
b3523c45
AD
4857 default:
4858 break;
3ba7b418 4859 }
12938fad
CK
4860 }
4861
4862 return true;
3ba7b418
AG
4863
4864disabled:
aac89168 4865 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4866 return false;
12938fad
CK
4867}
4868
5c03e584
FX
4869int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4870{
47fc644f
SS
4871 u32 i;
4872 int ret = 0;
5c03e584 4873
47fc644f 4874 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4875
47fc644f 4876 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4877
47fc644f
SS
4878 /* disable BM */
4879 pci_clear_master(adev->pdev);
5c03e584 4880
47fc644f 4881 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4882
47fc644f
SS
4883 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4884 dev_info(adev->dev, "GPU smu mode1 reset\n");
4885 ret = amdgpu_dpm_mode1_reset(adev);
4886 } else {
4887 dev_info(adev->dev, "GPU psp mode1 reset\n");
4888 ret = psp_gpu_reset(adev);
4889 }
5c03e584 4890
47fc644f 4891 if (ret)
2c0f880a 4892 goto mode1_reset_failed;
5c03e584 4893
47fc644f 4894 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4895 ret = amdgpu_psp_wait_for_bootloader(adev);
4896 if (ret)
2c0f880a 4897 goto mode1_reset_failed;
5c03e584 4898
47fc644f
SS
4899 /* wait for asic to come out of reset */
4900 for (i = 0; i < adev->usec_timeout; i++) {
4901 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4902
47fc644f
SS
4903 if (memsize != 0xffffffff)
4904 break;
4905 udelay(1);
4906 }
5c03e584 4907
2c0f880a
HZ
4908 if (i >= adev->usec_timeout) {
4909 ret = -ETIMEDOUT;
4910 goto mode1_reset_failed;
4911 }
4912
47fc644f 4913 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4914
2c0f880a
HZ
4915 return 0;
4916
4917mode1_reset_failed:
4918 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4919 return ret;
5c03e584 4920}
5c6dd71e 4921
e3c1b071 4922int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4923 struct amdgpu_reset_context *reset_context)
26bc5340 4924{
5c1e6fa4 4925 int i, r = 0;
04442bf7
LL
4926 struct amdgpu_job *job = NULL;
4927 bool need_full_reset =
4928 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4929
4930 if (reset_context->reset_req_dev == adev)
4931 job = reset_context->job;
71182665 4932
b602ca5f
TZ
4933 if (amdgpu_sriov_vf(adev)) {
4934 /* stop the data exchange thread */
4935 amdgpu_virt_fini_data_exchange(adev);
4936 }
4937
9e225fb9
AG
4938 amdgpu_fence_driver_isr_toggle(adev, true);
4939
71182665 4940 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4941 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4942 struct amdgpu_ring *ring = adev->rings[i];
4943
51687759 4944 if (!ring || !ring->sched.thread)
0875dc9e 4945 continue;
5740682e 4946
b8920e1e
SS
4947 /* Clear job fence from fence drv to avoid force_completion
4948 * leave NULL and vm flush fence in fence drv
4949 */
5c1e6fa4 4950 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4951
2f9d4084
ML
4952 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4953 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4954 }
d38ceaf9 4955
9e225fb9
AG
4956 amdgpu_fence_driver_isr_toggle(adev, false);
4957
ff99849b 4958 if (job && job->vm)
222b5f04
AG
4959 drm_sched_increase_karma(&job->base);
4960
04442bf7 4961 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 4962 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4963 if (r == -EOPNOTSUPP)
404b277b
LL
4964 r = 0;
4965 else
04442bf7
LL
4966 return r;
4967
1d721ed6 4968 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4969 if (!amdgpu_sriov_vf(adev)) {
4970
4971 if (!need_full_reset)
4972 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4973
360cd081
LG
4974 if (!need_full_reset && amdgpu_gpu_recovery &&
4975 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4976 amdgpu_device_ip_pre_soft_reset(adev);
4977 r = amdgpu_device_ip_soft_reset(adev);
4978 amdgpu_device_ip_post_soft_reset(adev);
4979 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4980 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4981 need_full_reset = true;
4982 }
4983 }
4984
4985 if (need_full_reset)
4986 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4987 if (need_full_reset)
4988 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4989 else
4990 clear_bit(AMDGPU_NEED_FULL_RESET,
4991 &reset_context->flags);
26bc5340
AG
4992 }
4993
4994 return r;
4995}
4996
15fd09a0
SA
4997static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4998{
15fd09a0
SA
4999 int i;
5000
38a15ad9 5001 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
5002
5003 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
5004 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
5005 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
5006 adev->reset_dump_reg_value[i]);
15fd09a0
SA
5007 }
5008
5009 return 0;
5010}
5011
a7691785
AA
5012#ifndef CONFIG_DEV_COREDUMP
5013static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
5014 struct amdgpu_reset_context *reset_context)
5015{
5016}
5017#else
3d8785f6
SA
5018static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
5019 size_t count, void *data, size_t datalen)
5020{
5021 struct drm_printer p;
a7691785 5022 struct amdgpu_coredump_info *coredump = data;
3d8785f6
SA
5023 struct drm_print_iterator iter;
5024 int i;
5025
5026 iter.data = buffer;
5027 iter.offset = 0;
5028 iter.start = offset;
5029 iter.remain = count;
5030
5031 p = drm_coredump_printer(&iter);
5032
5033 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
5034 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
5035 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
a7691785
AA
5036 drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec);
5037 if (coredump->reset_task_info.pid)
3d8785f6 5038 drm_printf(&p, "process_name: %s PID: %d\n",
a7691785
AA
5039 coredump->reset_task_info.process_name,
5040 coredump->reset_task_info.pid);
3d8785f6 5041
a7691785 5042 if (coredump->reset_vram_lost)
3d8785f6 5043 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
a7691785 5044 if (coredump->adev->num_regs) {
3d8785f6
SA
5045 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
5046
a7691785 5047 for (i = 0; i < coredump->adev->num_regs; i++)
3d8785f6 5048 drm_printf(&p, "0x%08x: 0x%08x\n",
a7691785
AA
5049 coredump->adev->reset_dump_reg_list[i],
5050 coredump->adev->reset_dump_reg_value[i]);
3d8785f6
SA
5051 }
5052
5053 return count - iter.remain;
5054}
5055
5056static void amdgpu_devcoredump_free(void *data)
5057{
a7691785 5058 kfree(data);
3d8785f6
SA
5059}
5060
a7691785
AA
5061static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
5062 struct amdgpu_reset_context *reset_context)
3d8785f6 5063{
a7691785 5064 struct amdgpu_coredump_info *coredump;
3d8785f6
SA
5065 struct drm_device *dev = adev_to_drm(adev);
5066
a7691785
AA
5067 coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
5068
5069 if (!coredump) {
5070 DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
5071 return;
5072 }
5073
5074 coredump->reset_vram_lost = vram_lost;
5075
5076 if (reset_context->job && reset_context->job->vm)
5077 coredump->reset_task_info = reset_context->job->vm->task_info;
5078
5079 coredump->adev = adev;
5080
5081 ktime_get_ts64(&coredump->reset_time);
5082
5083 dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
3d8785f6
SA
5084 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
5085}
5086#endif
5087
04442bf7
LL
5088int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5089 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5090{
5091 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5092 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5093 int r = 0;
f5c7e779 5094 bool gpu_reset_for_dev_remove = 0;
26bc5340 5095
04442bf7
LL
5096 /* Try reset handler method first */
5097 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5098 reset_list);
15fd09a0 5099 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5100
5101 reset_context->reset_device_list = device_list_handle;
04442bf7 5102 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5103 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5104 if (r == -EOPNOTSUPP)
404b277b
LL
5105 r = 0;
5106 else
04442bf7
LL
5107 return r;
5108
5109 /* Reset handler not implemented, use the default method */
5110 need_full_reset =
5111 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5112 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5113
f5c7e779
YC
5114 gpu_reset_for_dev_remove =
5115 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5116 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5117
26bc5340 5118 /*
655ce9cb 5119 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5120 * to allow proper links negotiation in FW (within 1 sec)
5121 */
7ac71382 5122 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5123 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5124 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5125 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5126 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5127 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5128 r = -EALREADY;
5129 } else
5130 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5131
041a62bc 5132 if (r) {
aac89168 5133 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5134 r, adev_to_drm(tmp_adev)->unique);
19349072 5135 goto out;
ce316fa5
LM
5136 }
5137 }
5138
041a62bc
AG
5139 /* For XGMI wait for all resets to complete before proceed */
5140 if (!r) {
655ce9cb 5141 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5142 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5143 flush_work(&tmp_adev->xgmi_reset_work);
5144 r = tmp_adev->asic_reset_res;
5145 if (r)
5146 break;
ce316fa5
LM
5147 }
5148 }
5149 }
ce316fa5 5150 }
26bc5340 5151
43c4d576 5152 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5153 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 5154 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5155 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5156 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
5157 }
5158
00eaa571 5159 amdgpu_ras_intr_cleared();
43c4d576 5160 }
00eaa571 5161
f5c7e779
YC
5162 /* Since the mode1 reset affects base ip blocks, the
5163 * phase1 ip blocks need to be resumed. Otherwise there
5164 * will be a BIOS signature error and the psp bootloader
5165 * can't load kdb on the next amdgpu install.
5166 */
5167 if (gpu_reset_for_dev_remove) {
5168 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5169 amdgpu_device_ip_resume_phase1(tmp_adev);
5170
5171 goto end;
5172 }
5173
655ce9cb 5174 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5175 if (need_full_reset) {
5176 /* post card */
e3c1b071 5177 r = amdgpu_device_asic_init(tmp_adev);
5178 if (r) {
aac89168 5179 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5180 } else {
26bc5340 5181 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5182
26bc5340
AG
5183 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5184 if (r)
5185 goto out;
5186
5187 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5188
5189 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5190
26bc5340 5191 if (vram_lost) {
77e7f829 5192 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5193 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5194 }
5195
26bc5340
AG
5196 r = amdgpu_device_fw_loading(tmp_adev);
5197 if (r)
5198 return r;
5199
c45e38f2
LL
5200 r = amdgpu_xcp_restore_partition_mode(
5201 tmp_adev->xcp_mgr);
5202 if (r)
5203 goto out;
5204
26bc5340
AG
5205 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5206 if (r)
5207 goto out;
5208
5209 if (vram_lost)
5210 amdgpu_device_fill_reset_magic(tmp_adev);
5211
fdafb359
EQ
5212 /*
5213 * Add this ASIC as tracked as reset was already
5214 * complete successfully.
5215 */
5216 amdgpu_register_gpu_instance(tmp_adev);
5217
04442bf7
LL
5218 if (!reset_context->hive &&
5219 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5220 amdgpu_xgmi_add_device(tmp_adev);
5221
7c04ca50 5222 r = amdgpu_device_ip_late_init(tmp_adev);
5223 if (r)
5224 goto out;
5225
087451f3 5226 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5227
e8fbaf03
GC
5228 /*
5229 * The GPU enters bad state once faulty pages
5230 * by ECC has reached the threshold, and ras
5231 * recovery is scheduled next. So add one check
5232 * here to break recovery if it indeed exceeds
5233 * bad page threshold, and remind user to
5234 * retire this GPU or setting one bigger
5235 * bad_page_threshold value to fix this once
5236 * probing driver again.
5237 */
11003c68 5238 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5239 /* must succeed. */
5240 amdgpu_ras_resume(tmp_adev);
5241 } else {
5242 r = -EINVAL;
5243 goto out;
5244 }
e79a04d5 5245
26bc5340 5246 /* Update PSP FW topology after reset */
04442bf7
LL
5247 if (reset_context->hive &&
5248 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5249 r = amdgpu_xgmi_update_topology(
5250 reset_context->hive, tmp_adev);
26bc5340
AG
5251 }
5252 }
5253
26bc5340
AG
5254out:
5255 if (!r) {
5256 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5257 r = amdgpu_ib_ring_tests(tmp_adev);
5258 if (r) {
5259 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5260 need_full_reset = true;
5261 r = -EAGAIN;
5262 goto end;
5263 }
5264 }
5265
5266 if (!r)
5267 r = amdgpu_device_recover_vram(tmp_adev);
5268 else
5269 tmp_adev->asic_reset_res = r;
5270 }
5271
5272end:
04442bf7
LL
5273 if (need_full_reset)
5274 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5275 else
5276 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5277 return r;
5278}
5279
e923be99 5280static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5281{
5740682e 5282
a3a09142
AD
5283 switch (amdgpu_asic_reset_method(adev)) {
5284 case AMD_RESET_METHOD_MODE1:
5285 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5286 break;
5287 case AMD_RESET_METHOD_MODE2:
5288 adev->mp1_state = PP_MP1_STATE_RESET;
5289 break;
5290 default:
5291 adev->mp1_state = PP_MP1_STATE_NONE;
5292 break;
5293 }
26bc5340 5294}
d38ceaf9 5295
e923be99 5296static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5297{
89041940 5298 amdgpu_vf_error_trans_all(adev);
a3a09142 5299 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5300}
5301
3f12acc8
EQ
5302static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5303{
5304 struct pci_dev *p = NULL;
5305
5306 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5307 adev->pdev->bus->number, 1);
5308 if (p) {
5309 pm_runtime_enable(&(p->dev));
5310 pm_runtime_resume(&(p->dev));
5311 }
b85e285e
YY
5312
5313 pci_dev_put(p);
3f12acc8
EQ
5314}
5315
5316static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5317{
5318 enum amd_reset_method reset_method;
5319 struct pci_dev *p = NULL;
5320 u64 expires;
5321
5322 /*
5323 * For now, only BACO and mode1 reset are confirmed
5324 * to suffer the audio issue without proper suspended.
5325 */
5326 reset_method = amdgpu_asic_reset_method(adev);
5327 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5328 (reset_method != AMD_RESET_METHOD_MODE1))
5329 return -EINVAL;
5330
5331 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5332 adev->pdev->bus->number, 1);
5333 if (!p)
5334 return -ENODEV;
5335
5336 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5337 if (!expires)
5338 /*
5339 * If we cannot get the audio device autosuspend delay,
5340 * a fixed 4S interval will be used. Considering 3S is
5341 * the audio controller default autosuspend delay setting.
5342 * 4S used here is guaranteed to cover that.
5343 */
54b7feb9 5344 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5345
5346 while (!pm_runtime_status_suspended(&(p->dev))) {
5347 if (!pm_runtime_suspend(&(p->dev)))
5348 break;
5349
5350 if (expires < ktime_get_mono_fast_ns()) {
5351 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5352 pci_dev_put(p);
3f12acc8
EQ
5353 /* TODO: abort the succeeding gpu reset? */
5354 return -ETIMEDOUT;
5355 }
5356 }
5357
5358 pm_runtime_disable(&(p->dev));
5359
b85e285e 5360 pci_dev_put(p);
3f12acc8
EQ
5361 return 0;
5362}
5363
d193b12b 5364static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5365{
5366 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5367
5368#if defined(CONFIG_DEBUG_FS)
5369 if (!amdgpu_sriov_vf(adev))
5370 cancel_work(&adev->reset_work);
5371#endif
5372
5373 if (adev->kfd.dev)
5374 cancel_work(&adev->kfd.reset_work);
5375
5376 if (amdgpu_sriov_vf(adev))
5377 cancel_work(&adev->virt.flr_work);
5378
5379 if (con && adev->ras_enabled)
5380 cancel_work(&con->recovery_work);
5381
5382}
5383
26bc5340 5384/**
6e9c65f7 5385 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5386 *
982a820b 5387 * @adev: amdgpu_device pointer
26bc5340 5388 * @job: which job trigger hang
80bd2de1 5389 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5390 *
5391 * Attempt to reset the GPU if it has hung (all asics).
5392 * Attempt to do soft-reset or full-reset and reinitialize Asic
5393 * Returns 0 for success or an error on failure.
5394 */
5395
cf727044 5396int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5397 struct amdgpu_job *job,
5398 struct amdgpu_reset_context *reset_context)
26bc5340 5399{
1d721ed6 5400 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5401 bool job_signaled = false;
26bc5340 5402 struct amdgpu_hive_info *hive = NULL;
26bc5340 5403 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5404 int i, r = 0;
bb5c7235 5405 bool need_emergency_restart = false;
3f12acc8 5406 bool audio_suspended = false;
f5c7e779
YC
5407 bool gpu_reset_for_dev_remove = false;
5408
5409 gpu_reset_for_dev_remove =
5410 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5411 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5412
6e3cd2a9 5413 /*
bb5c7235
WS
5414 * Special case: RAS triggered and full reset isn't supported
5415 */
5416 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5417
d5ea093e
AG
5418 /*
5419 * Flush RAM to disk so that after reboot
5420 * the user can read log and see why the system rebooted.
5421 */
bb5c7235 5422 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5423 DRM_WARN("Emergency reboot.");
5424
5425 ksys_sync_helper();
5426 emergency_restart();
5427 }
5428
b823821f 5429 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5430 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5431
175ac6ec
ZL
5432 if (!amdgpu_sriov_vf(adev))
5433 hive = amdgpu_get_xgmi_hive(adev);
681260df 5434 if (hive)
53b3f8f4 5435 mutex_lock(&hive->hive_lock);
26bc5340 5436
f1549c09
LG
5437 reset_context->job = job;
5438 reset_context->hive = hive;
9e94d22c
EQ
5439 /*
5440 * Build list of devices to reset.
5441 * In case we are in XGMI hive mode, resort the device list
5442 * to put adev in the 1st position.
5443 */
5444 INIT_LIST_HEAD(&device_list);
175ac6ec 5445 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5446 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5447 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5448 if (gpu_reset_for_dev_remove && adev->shutdown)
5449 tmp_adev->shutdown = true;
5450 }
655ce9cb 5451 if (!list_is_first(&adev->reset_list, &device_list))
5452 list_rotate_to_front(&adev->reset_list, &device_list);
5453 device_list_handle = &device_list;
26bc5340 5454 } else {
655ce9cb 5455 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5456 device_list_handle = &device_list;
5457 }
5458
e923be99
AG
5459 /* We need to lock reset domain only once both for XGMI and single device */
5460 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5461 reset_list);
3675c2f2 5462 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5463
1d721ed6 5464 /* block all schedulers and reset given job's ring */
655ce9cb 5465 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5466
e923be99 5467 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5468
3f12acc8
EQ
5469 /*
5470 * Try to put the audio codec into suspend state
5471 * before gpu reset started.
5472 *
5473 * Due to the power domain of the graphics device
5474 * is shared with AZ power domain. Without this,
5475 * we may change the audio hardware from behind
5476 * the audio driver's back. That will trigger
5477 * some audio codec errors.
5478 */
5479 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5480 audio_suspended = true;
5481
9e94d22c
EQ
5482 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5483
52fb44cf
EQ
5484 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5485
c004d44e 5486 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5487 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5488
12ffa55d
AG
5489 /*
5490 * Mark these ASICs to be reseted as untracked first
5491 * And add them back after reset completed
5492 */
5493 amdgpu_unregister_gpu_instance(tmp_adev);
5494
163d4cd2 5495 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5496
f1c1314b 5497 /* disable ras on ALL IPs */
bb5c7235 5498 if (!need_emergency_restart &&
b823821f 5499 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5500 amdgpu_ras_suspend(tmp_adev);
5501
1d721ed6
AG
5502 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5503 struct amdgpu_ring *ring = tmp_adev->rings[i];
5504
5505 if (!ring || !ring->sched.thread)
5506 continue;
5507
0b2d2c2e 5508 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5509
bb5c7235 5510 if (need_emergency_restart)
7c6e68c7 5511 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5512 }
8f8c80f4 5513 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5514 }
5515
bb5c7235 5516 if (need_emergency_restart)
7c6e68c7
AG
5517 goto skip_sched_resume;
5518
1d721ed6
AG
5519 /*
5520 * Must check guilty signal here since after this point all old
5521 * HW fences are force signaled.
5522 *
5523 * job->base holds a reference to parent fence
5524 */
f6a3f660 5525 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5526 job_signaled = true;
1d721ed6
AG
5527 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5528 goto skip_hw_reset;
5529 }
5530
26bc5340 5531retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5532 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5533 if (gpu_reset_for_dev_remove) {
5534 /* Workaroud for ASICs need to disable SMC first */
5535 amdgpu_device_smu_fini_early(tmp_adev);
5536 }
f1549c09 5537 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5538 /*TODO Should we stop ?*/
5539 if (r) {
aac89168 5540 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5541 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5542 tmp_adev->asic_reset_res = r;
5543 }
247c7b0d
AG
5544
5545 /*
5546 * Drop all pending non scheduler resets. Scheduler resets
5547 * were already dropped during drm_sched_stop
5548 */
d193b12b 5549 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5550 }
5551
5552 /* Actual ASIC resets if needed.*/
4f30d920 5553 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5554 if (amdgpu_sriov_vf(adev)) {
5555 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5556 if (r)
5557 adev->asic_reset_res = r;
950d6425 5558
28606c4e 5559 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5560 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5561 IP_VERSION(9, 4, 2) ||
5562 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5563 amdgpu_ras_resume(adev);
26bc5340 5564 } else {
f1549c09 5565 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5566 if (r && r == -EAGAIN)
26bc5340 5567 goto retry;
f5c7e779
YC
5568
5569 if (!r && gpu_reset_for_dev_remove)
5570 goto recover_end;
26bc5340
AG
5571 }
5572
1d721ed6
AG
5573skip_hw_reset:
5574
26bc5340 5575 /* Post ASIC reset for all devs .*/
655ce9cb 5576 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5577
1d721ed6
AG
5578 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5579 struct amdgpu_ring *ring = tmp_adev->rings[i];
5580
5581 if (!ring || !ring->sched.thread)
5582 continue;
5583
6868a2c4 5584 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5585 }
5586
4e8303cf
LL
5587 if (adev->enable_mes &&
5588 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
ed67f729
JX
5589 amdgpu_mes_self_test(tmp_adev);
5590
b8920e1e 5591 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5592 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5593
7258fa31
SK
5594 if (tmp_adev->asic_reset_res)
5595 r = tmp_adev->asic_reset_res;
5596
1d721ed6 5597 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5598
5599 if (r) {
5600 /* bad news, how to tell it to userspace ? */
12ffa55d 5601 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5602 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5603 } else {
12ffa55d 5604 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5605 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5606 DRM_WARN("smart shift update failed\n");
26bc5340 5607 }
7c6e68c7 5608 }
26bc5340 5609
7c6e68c7 5610skip_sched_resume:
655ce9cb 5611 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5612 /* unlock kfd: SRIOV would do it separately */
c004d44e 5613 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5614 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5615
5616 /* kfd_post_reset will do nothing if kfd device is not initialized,
5617 * need to bring up kfd here if it's not be initialized before
5618 */
5619 if (!adev->kfd.init_complete)
5620 amdgpu_amdkfd_device_init(adev);
5621
3f12acc8
EQ
5622 if (audio_suspended)
5623 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5624
5625 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5626
5627 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5628 }
5629
f5c7e779 5630recover_end:
e923be99
AG
5631 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5632 reset_list);
5633 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5634
9e94d22c 5635 if (hive) {
9e94d22c 5636 mutex_unlock(&hive->hive_lock);
d95e8e97 5637 amdgpu_put_xgmi_hive(hive);
9e94d22c 5638 }
26bc5340 5639
f287a3c5 5640 if (r)
26bc5340 5641 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5642
5643 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5644 return r;
5645}
5646
e3ecdffa
AD
5647/**
5648 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5649 *
5650 * @adev: amdgpu_device pointer
5651 *
5652 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5653 * and lanes) of the slot the device is in. Handles APUs and
5654 * virtualized environments where PCIE config space may not be available.
5655 */
5494d864 5656static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5657{
5d9a6330 5658 struct pci_dev *pdev;
c5313457
HK
5659 enum pci_bus_speed speed_cap, platform_speed_cap;
5660 enum pcie_link_width platform_link_width;
d0dd7f0c 5661
cd474ba0
AD
5662 if (amdgpu_pcie_gen_cap)
5663 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5664
cd474ba0
AD
5665 if (amdgpu_pcie_lane_cap)
5666 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5667
cd474ba0 5668 /* covers APUs as well */
04e85958 5669 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5670 if (adev->pm.pcie_gen_mask == 0)
5671 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5672 if (adev->pm.pcie_mlw_mask == 0)
5673 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5674 return;
cd474ba0 5675 }
d0dd7f0c 5676
c5313457
HK
5677 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5678 return;
5679
dbaa922b
AD
5680 pcie_bandwidth_available(adev->pdev, NULL,
5681 &platform_speed_cap, &platform_link_width);
c5313457 5682
cd474ba0 5683 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5684 /* asic caps */
5685 pdev = adev->pdev;
5686 speed_cap = pcie_get_speed_cap(pdev);
5687 if (speed_cap == PCI_SPEED_UNKNOWN) {
5688 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5689 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5690 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5691 } else {
2b3a1f51
FX
5692 if (speed_cap == PCIE_SPEED_32_0GT)
5693 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5694 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5695 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5696 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5697 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5698 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5699 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5700 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5701 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5702 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5703 else if (speed_cap == PCIE_SPEED_8_0GT)
5704 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5705 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5706 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5707 else if (speed_cap == PCIE_SPEED_5_0GT)
5708 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5709 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5710 else
5711 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5712 }
5713 /* platform caps */
c5313457 5714 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5715 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5716 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5717 } else {
2b3a1f51
FX
5718 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5719 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5720 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5721 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5722 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5723 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5724 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5725 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5726 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5727 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5728 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5729 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5730 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5731 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5732 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5733 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5734 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5735 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5736 else
5737 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5738
cd474ba0
AD
5739 }
5740 }
5741 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5742 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5743 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5744 } else {
c5313457 5745 switch (platform_link_width) {
5d9a6330 5746 case PCIE_LNK_X32:
cd474ba0
AD
5747 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5748 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5749 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5750 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5751 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5752 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5753 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5754 break;
5d9a6330 5755 case PCIE_LNK_X16:
cd474ba0
AD
5756 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5757 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5758 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5759 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5760 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5761 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5762 break;
5d9a6330 5763 case PCIE_LNK_X12:
cd474ba0
AD
5764 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5765 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5766 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5767 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5768 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5769 break;
5d9a6330 5770 case PCIE_LNK_X8:
cd474ba0
AD
5771 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5772 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5773 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5774 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5775 break;
5d9a6330 5776 case PCIE_LNK_X4:
cd474ba0
AD
5777 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5778 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5779 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5780 break;
5d9a6330 5781 case PCIE_LNK_X2:
cd474ba0
AD
5782 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5783 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5784 break;
5d9a6330 5785 case PCIE_LNK_X1:
cd474ba0
AD
5786 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5787 break;
5788 default:
5789 break;
5790 }
d0dd7f0c
AD
5791 }
5792 }
5793}
d38ceaf9 5794
08a2fd23
RE
5795/**
5796 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5797 *
5798 * @adev: amdgpu_device pointer
5799 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5800 *
5801 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5802 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5803 * @peer_adev.
5804 */
5805bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5806 struct amdgpu_device *peer_adev)
5807{
5808#ifdef CONFIG_HSA_AMD_P2P
5809 uint64_t address_mask = peer_adev->dev->dma_mask ?
5810 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5811 resource_size_t aper_limit =
5812 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5813 bool p2p_access =
5814 !adev->gmc.xgmi.connected_to_cpu &&
5815 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5816
5817 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5818 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5819 !(adev->gmc.aper_base & address_mask ||
5820 aper_limit & address_mask));
5821#else
5822 return false;
5823#endif
5824}
5825
361dbd01
AD
5826int amdgpu_device_baco_enter(struct drm_device *dev)
5827{
1348969a 5828 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5829 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5830
6ab68650 5831 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5832 return -ENOTSUPP;
5833
8ab0d6f0 5834 if (ras && adev->ras_enabled &&
acdae216 5835 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5836 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5837
9530273e 5838 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5839}
5840
5841int amdgpu_device_baco_exit(struct drm_device *dev)
5842{
1348969a 5843 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5844 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5845 int ret = 0;
361dbd01 5846
6ab68650 5847 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5848 return -ENOTSUPP;
5849
9530273e
EQ
5850 ret = amdgpu_dpm_baco_exit(adev);
5851 if (ret)
5852 return ret;
7a22677b 5853
8ab0d6f0 5854 if (ras && adev->ras_enabled &&
acdae216 5855 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5856 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5857
1bece222
CL
5858 if (amdgpu_passthrough(adev) &&
5859 adev->nbio.funcs->clear_doorbell_interrupt)
5860 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5861
7a22677b 5862 return 0;
361dbd01 5863}
c9a6b82f
AG
5864
5865/**
5866 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5867 * @pdev: PCI device struct
5868 * @state: PCI channel state
5869 *
5870 * Description: Called when a PCI error is detected.
5871 *
5872 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5873 */
5874pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5875{
5876 struct drm_device *dev = pci_get_drvdata(pdev);
5877 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5878 int i;
c9a6b82f
AG
5879
5880 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5881
6894305c
AG
5882 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5883 DRM_WARN("No support for XGMI hive yet...");
5884 return PCI_ERS_RESULT_DISCONNECT;
5885 }
5886
e17e27f9
GC
5887 adev->pci_channel_state = state;
5888
c9a6b82f
AG
5889 switch (state) {
5890 case pci_channel_io_normal:
5891 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5892 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5893 case pci_channel_io_frozen:
5894 /*
d0fb18b5 5895 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5896 * to GPU during PCI error recovery
5897 */
3675c2f2 5898 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5899 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5900
5901 /*
5902 * Block any work scheduling as we do for regular GPU reset
5903 * for the duration of the recovery
5904 */
5905 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5906 struct amdgpu_ring *ring = adev->rings[i];
5907
5908 if (!ring || !ring->sched.thread)
5909 continue;
5910
5911 drm_sched_stop(&ring->sched, NULL);
5912 }
8f8c80f4 5913 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5914 return PCI_ERS_RESULT_NEED_RESET;
5915 case pci_channel_io_perm_failure:
5916 /* Permanent error, prepare for device removal */
5917 return PCI_ERS_RESULT_DISCONNECT;
5918 }
5919
5920 return PCI_ERS_RESULT_NEED_RESET;
5921}
5922
5923/**
5924 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5925 * @pdev: pointer to PCI device
5926 */
5927pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5928{
5929
5930 DRM_INFO("PCI error: mmio enabled callback!!\n");
5931
5932 /* TODO - dump whatever for debugging purposes */
5933
5934 /* This called only if amdgpu_pci_error_detected returns
5935 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5936 * works, no need to reset slot.
5937 */
5938
5939 return PCI_ERS_RESULT_RECOVERED;
5940}
5941
5942/**
5943 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5944 * @pdev: PCI device struct
5945 *
5946 * Description: This routine is called by the pci error recovery
5947 * code after the PCI slot has been reset, just before we
5948 * should resume normal operations.
5949 */
5950pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5951{
5952 struct drm_device *dev = pci_get_drvdata(pdev);
5953 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5954 int r, i;
04442bf7 5955 struct amdgpu_reset_context reset_context;
362c7b91 5956 u32 memsize;
7ac71382 5957 struct list_head device_list;
c9a6b82f
AG
5958
5959 DRM_INFO("PCI error: slot reset callback!!\n");
5960
04442bf7
LL
5961 memset(&reset_context, 0, sizeof(reset_context));
5962
7ac71382 5963 INIT_LIST_HEAD(&device_list);
655ce9cb 5964 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5965
362c7b91
AG
5966 /* wait for asic to come out of reset */
5967 msleep(500);
5968
7ac71382 5969 /* Restore PCI confspace */
c1dd4aa6 5970 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5971
362c7b91
AG
5972 /* confirm ASIC came out of reset */
5973 for (i = 0; i < adev->usec_timeout; i++) {
5974 memsize = amdgpu_asic_get_config_memsize(adev);
5975
5976 if (memsize != 0xffffffff)
5977 break;
5978 udelay(1);
5979 }
5980 if (memsize == 0xffffffff) {
5981 r = -ETIME;
5982 goto out;
5983 }
5984
04442bf7
LL
5985 reset_context.method = AMD_RESET_METHOD_NONE;
5986 reset_context.reset_req_dev = adev;
5987 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5988 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5989
7afefb81 5990 adev->no_hw_access = true;
04442bf7 5991 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5992 adev->no_hw_access = false;
c9a6b82f
AG
5993 if (r)
5994 goto out;
5995
04442bf7 5996 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5997
5998out:
c9a6b82f 5999 if (!r) {
c1dd4aa6
AG
6000 if (amdgpu_device_cache_pci_state(adev->pdev))
6001 pci_restore_state(adev->pdev);
6002
c9a6b82f
AG
6003 DRM_INFO("PCIe error recovery succeeded\n");
6004 } else {
6005 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
6006 amdgpu_device_unset_mp1_state(adev);
6007 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
6008 }
6009
6010 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6011}
6012
6013/**
6014 * amdgpu_pci_resume() - resume normal ops after PCI reset
6015 * @pdev: pointer to PCI device
6016 *
6017 * Called when the error recovery driver tells us that its
505199a3 6018 * OK to resume normal operation.
c9a6b82f
AG
6019 */
6020void amdgpu_pci_resume(struct pci_dev *pdev)
6021{
6022 struct drm_device *dev = pci_get_drvdata(pdev);
6023 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6024 int i;
c9a6b82f 6025
c9a6b82f
AG
6026
6027 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 6028
e17e27f9
GC
6029 /* Only continue execution for the case of pci_channel_io_frozen */
6030 if (adev->pci_channel_state != pci_channel_io_frozen)
6031 return;
6032
acd89fca
AG
6033 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6034 struct amdgpu_ring *ring = adev->rings[i];
6035
6036 if (!ring || !ring->sched.thread)
6037 continue;
6038
acd89fca
AG
6039 drm_sched_start(&ring->sched, true);
6040 }
6041
e923be99
AG
6042 amdgpu_device_unset_mp1_state(adev);
6043 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6044}
c1dd4aa6
AG
6045
6046bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6047{
6048 struct drm_device *dev = pci_get_drvdata(pdev);
6049 struct amdgpu_device *adev = drm_to_adev(dev);
6050 int r;
6051
6052 r = pci_save_state(pdev);
6053 if (!r) {
6054 kfree(adev->pci_state);
6055
6056 adev->pci_state = pci_store_saved_state(pdev);
6057
6058 if (!adev->pci_state) {
6059 DRM_ERROR("Failed to store PCI saved state");
6060 return false;
6061 }
6062 } else {
6063 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6064 return false;
6065 }
6066
6067 return true;
6068}
6069
6070bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6071{
6072 struct drm_device *dev = pci_get_drvdata(pdev);
6073 struct amdgpu_device *adev = drm_to_adev(dev);
6074 int r;
6075
6076 if (!adev->pci_state)
6077 return false;
6078
6079 r = pci_load_saved_state(pdev, adev->pci_state);
6080
6081 if (!r) {
6082 pci_restore_state(pdev);
6083 } else {
6084 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6085 return false;
6086 }
6087
6088 return true;
6089}
6090
810085dd
EH
6091void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6092 struct amdgpu_ring *ring)
6093{
6094#ifdef CONFIG_X86_64
b818a5d3 6095 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6096 return;
6097#endif
6098 if (adev->gmc.xgmi.connected_to_cpu)
6099 return;
6100
6101 if (ring && ring->funcs->emit_hdp_flush)
6102 amdgpu_ring_emit_hdp_flush(ring);
6103 else
6104 amdgpu_asic_flush_hdp(adev, ring);
6105}
c1dd4aa6 6106
810085dd
EH
6107void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6108 struct amdgpu_ring *ring)
6109{
6110#ifdef CONFIG_X86_64
b818a5d3 6111 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6112 return;
6113#endif
6114 if (adev->gmc.xgmi.connected_to_cpu)
6115 return;
c1dd4aa6 6116
810085dd
EH
6117 amdgpu_asic_invalidate_hdp(adev, ring);
6118}
34f3a4a9 6119
89a7a870
AG
6120int amdgpu_in_reset(struct amdgpu_device *adev)
6121{
6122 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6123}
6124
34f3a4a9
LY
6125/**
6126 * amdgpu_device_halt() - bring hardware to some kind of halt state
6127 *
6128 * @adev: amdgpu_device pointer
6129 *
6130 * Bring hardware to some kind of halt state so that no one can touch it
6131 * any more. It will help to maintain error context when error occurred.
6132 * Compare to a simple hang, the system will keep stable at least for SSH
6133 * access. Then it should be trivial to inspect the hardware state and
6134 * see what's going on. Implemented as following:
6135 *
6136 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6137 * clears all CPU mappings to device, disallows remappings through page faults
6138 * 2. amdgpu_irq_disable_all() disables all interrupts
6139 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6140 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6141 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6142 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6143 * flush any in flight DMA operations
6144 */
6145void amdgpu_device_halt(struct amdgpu_device *adev)
6146{
6147 struct pci_dev *pdev = adev->pdev;
e0f943b4 6148 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6149
2c1c7ba4 6150 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6151 drm_dev_unplug(ddev);
6152
6153 amdgpu_irq_disable_all(adev);
6154
6155 amdgpu_fence_driver_hw_fini(adev);
6156
6157 adev->no_hw_access = true;
6158
6159 amdgpu_device_unmap_mmio(adev);
6160
6161 pci_disable_device(pdev);
6162 pci_wait_for_pending_transaction(pdev);
6163}
86700a40
XD
6164
6165u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6166 u32 reg)
6167{
6168 unsigned long flags, address, data;
6169 u32 r;
6170
6171 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6172 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6173
6174 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6175 WREG32(address, reg * 4);
6176 (void)RREG32(address);
6177 r = RREG32(data);
6178 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6179 return r;
6180}
6181
6182void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6183 u32 reg, u32 v)
6184{
6185 unsigned long flags, address, data;
6186
6187 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6188 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6189
6190 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6191 WREG32(address, reg * 4);
6192 (void)RREG32(address);
6193 WREG32(data, v);
6194 (void)RREG32(data);
6195 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6196}
68ce8b24
CK
6197
6198/**
6199 * amdgpu_device_switch_gang - switch to a new gang
6200 * @adev: amdgpu_device pointer
6201 * @gang: the gang to switch to
6202 *
6203 * Try to switch to a new gang.
6204 * Returns: NULL if we switched to the new gang or a reference to the current
6205 * gang leader.
6206 */
6207struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6208 struct dma_fence *gang)
6209{
6210 struct dma_fence *old = NULL;
6211
6212 do {
6213 dma_fence_put(old);
6214 rcu_read_lock();
6215 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6216 rcu_read_unlock();
6217
6218 if (old == gang)
6219 break;
6220
6221 if (!dma_fence_is_signaled(old))
6222 return old;
6223
6224 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6225 old, gang) != old);
6226
6227 dma_fence_put(old);
6228 return NULL;
6229}
220c8cc8
AD
6230
6231bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6232{
6233 switch (adev->asic_type) {
6234#ifdef CONFIG_DRM_AMDGPU_SI
6235 case CHIP_HAINAN:
6236#endif
6237 case CHIP_TOPAZ:
6238 /* chips with no display hardware */
6239 return false;
6240#ifdef CONFIG_DRM_AMDGPU_SI
6241 case CHIP_TAHITI:
6242 case CHIP_PITCAIRN:
6243 case CHIP_VERDE:
6244 case CHIP_OLAND:
6245#endif
6246#ifdef CONFIG_DRM_AMDGPU_CIK
6247 case CHIP_BONAIRE:
6248 case CHIP_HAWAII:
6249 case CHIP_KAVERI:
6250 case CHIP_KABINI:
6251 case CHIP_MULLINS:
6252#endif
6253 case CHIP_TONGA:
6254 case CHIP_FIJI:
6255 case CHIP_POLARIS10:
6256 case CHIP_POLARIS11:
6257 case CHIP_POLARIS12:
6258 case CHIP_VEGAM:
6259 case CHIP_CARRIZO:
6260 case CHIP_STONEY:
6261 /* chips with display hardware */
6262 return true;
6263 default:
6264 /* IP discovery */
4e8303cf 6265 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6266 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6267 return false;
6268 return true;
6269 }
6270}
81283fee
JZ
6271
6272uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6273 uint32_t inst, uint32_t reg_addr, char reg_name[],
6274 uint32_t expected_value, uint32_t mask)
6275{
6276 uint32_t ret = 0;
6277 uint32_t old_ = 0;
6278 uint32_t tmp_ = RREG32(reg_addr);
6279 uint32_t loop = adev->usec_timeout;
6280
6281 while ((tmp_ & (mask)) != (expected_value)) {
6282 if (old_ != tmp_) {
6283 loop = adev->usec_timeout;
6284 old_ = tmp_;
6285 } else
6286 udelay(1);
6287 tmp_ = RREG32(reg_addr);
6288 loop--;
6289 if (!loop) {
6290 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6291 inst, reg_name, (uint32_t)expected_value,
6292 (uint32_t)(tmp_ & (mask)));
6293 ret = -ETIMEDOUT;
6294 break;
6295 }
6296 }
6297 return ret;
6298}