drm/amdgpu: route ioctls on primary node of XCPs to primary device
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
162static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166
KR
167/**
168 * DOC: product_name
169 *
170 * The amdgpu driver provides a sysfs API for reporting the product name
171 * for the device
2c496a6c 172 * The file product_name is used for this and returns the product name
bd607166
KR
173 * as returned from the FRU.
174 * NOTE: This is only available for certain server cards
175 */
176
177static ssize_t amdgpu_device_get_product_name(struct device *dev,
178 struct device_attribute *attr, char *buf)
179{
180 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 181 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 182
36000c7a 183 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
184}
185
186static DEVICE_ATTR(product_name, S_IRUGO,
187 amdgpu_device_get_product_name, NULL);
188
189/**
190 * DOC: product_number
191 *
192 * The amdgpu driver provides a sysfs API for reporting the part number
193 * for the device
2c496a6c 194 * The file product_number is used for this and returns the part number
bd607166
KR
195 * as returned from the FRU.
196 * NOTE: This is only available for certain server cards
197 */
198
199static ssize_t amdgpu_device_get_product_number(struct device *dev,
200 struct device_attribute *attr, char *buf)
201{
202 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 203 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 204
36000c7a 205 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
206}
207
208static DEVICE_ATTR(product_number, S_IRUGO,
209 amdgpu_device_get_product_number, NULL);
210
211/**
212 * DOC: serial_number
213 *
214 * The amdgpu driver provides a sysfs API for reporting the serial number
215 * for the device
216 * The file serial_number is used for this and returns the serial number
217 * as returned from the FRU.
218 * NOTE: This is only available for certain server cards
219 */
220
221static ssize_t amdgpu_device_get_serial_number(struct device *dev,
222 struct device_attribute *attr, char *buf)
223{
224 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 225 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 226
36000c7a 227 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
228}
229
230static DEVICE_ATTR(serial_number, S_IRUGO,
231 amdgpu_device_get_serial_number, NULL);
232
fd496ca8 233/**
b98c6299 234 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
235 *
236 * @dev: drm_device pointer
237 *
b98c6299 238 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
239 * otherwise return false.
240 */
b98c6299 241bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
242{
243 struct amdgpu_device *adev = drm_to_adev(dev);
244
b98c6299 245 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
246 return true;
247 return false;
248}
249
e3ecdffa 250/**
0330b848 251 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
252 *
253 * @dev: drm_device pointer
254 *
b98c6299 255 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
256 * otherwise return false.
257 */
31af062a 258bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 259{
1348969a 260 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 261
b98c6299
AD
262 if (adev->has_pr3 ||
263 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
264 return true;
265 return false;
266}
267
a69cba42
AD
268/**
269 * amdgpu_device_supports_baco - Does the device support BACO
270 *
271 * @dev: drm_device pointer
272 *
273 * Returns true if the device supporte BACO,
274 * otherwise return false.
275 */
276bool amdgpu_device_supports_baco(struct drm_device *dev)
277{
1348969a 278 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
279
280 return amdgpu_asic_supports_baco(adev);
281}
282
3fa8f89d
S
283/**
284 * amdgpu_device_supports_smart_shift - Is the device dGPU with
285 * smart shift support
286 *
287 * @dev: drm_device pointer
288 *
289 * Returns true if the device is a dGPU with Smart Shift support,
290 * otherwise returns false.
291 */
292bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
293{
294 return (amdgpu_device_supports_boco(dev) &&
295 amdgpu_acpi_is_power_shift_control_supported());
296}
297
6e3cd2a9
MCC
298/*
299 * VRAM access helper functions
300 */
301
e35e2b11 302/**
048af66b 303 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
304 *
305 * @adev: amdgpu_device pointer
306 * @pos: offset of the buffer in vram
307 * @buf: virtual address of the buffer in system memory
308 * @size: read/write size, sizeof(@buf) must > @size
309 * @write: true - write to vram, otherwise - read from vram
310 */
048af66b
KW
311void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
312 void *buf, size_t size, bool write)
e35e2b11 313{
e35e2b11 314 unsigned long flags;
048af66b
KW
315 uint32_t hi = ~0, tmp = 0;
316 uint32_t *data = buf;
ce05ac56 317 uint64_t last;
f89f8c6b 318 int idx;
ce05ac56 319
c58a863b 320 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 321 return;
9d11eb0d 322
048af66b
KW
323 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
324
325 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
326 for (last = pos + size; pos < last; pos += 4) {
327 tmp = pos >> 31;
328
329 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
330 if (tmp != hi) {
331 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
332 hi = tmp;
333 }
334 if (write)
335 WREG32_NO_KIQ(mmMM_DATA, *data++);
336 else
337 *data++ = RREG32_NO_KIQ(mmMM_DATA);
338 }
339
340 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
341 drm_dev_exit(idx);
342}
343
344/**
bbe04dec 345 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
346 *
347 * @adev: amdgpu_device pointer
348 * @pos: offset of the buffer in vram
349 * @buf: virtual address of the buffer in system memory
350 * @size: read/write size, sizeof(@buf) must > @size
351 * @write: true - write to vram, otherwise - read from vram
352 *
353 * The return value means how many bytes have been transferred.
354 */
355size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
356 void *buf, size_t size, bool write)
357{
9d11eb0d 358#ifdef CONFIG_64BIT
048af66b
KW
359 void __iomem *addr;
360 size_t count = 0;
361 uint64_t last;
362
363 if (!adev->mman.aper_base_kaddr)
364 return 0;
365
9d11eb0d
CK
366 last = min(pos + size, adev->gmc.visible_vram_size);
367 if (last > pos) {
048af66b
KW
368 addr = adev->mman.aper_base_kaddr + pos;
369 count = last - pos;
9d11eb0d
CK
370
371 if (write) {
372 memcpy_toio(addr, buf, count);
373 mb();
810085dd 374 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 375 } else {
810085dd 376 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
377 mb();
378 memcpy_fromio(buf, addr, count);
379 }
380
9d11eb0d 381 }
048af66b
KW
382
383 return count;
384#else
385 return 0;
9d11eb0d 386#endif
048af66b 387}
9d11eb0d 388
048af66b
KW
389/**
390 * amdgpu_device_vram_access - read/write a buffer in vram
391 *
392 * @adev: amdgpu_device pointer
393 * @pos: offset of the buffer in vram
394 * @buf: virtual address of the buffer in system memory
395 * @size: read/write size, sizeof(@buf) must > @size
396 * @write: true - write to vram, otherwise - read from vram
397 */
398void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
399 void *buf, size_t size, bool write)
400{
401 size_t count;
e35e2b11 402
048af66b
KW
403 /* try to using vram apreature to access vram first */
404 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
405 size -= count;
406 if (size) {
407 /* using MM to access rest vram */
408 pos += count;
409 buf += count;
410 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
411 }
412}
413
d38ceaf9 414/*
f7ee1874 415 * register access helper functions.
d38ceaf9 416 */
56b53c0b
DL
417
418/* Check if hw access should be skipped because of hotplug or device error */
419bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
420{
7afefb81 421 if (adev->no_hw_access)
56b53c0b
DL
422 return true;
423
424#ifdef CONFIG_LOCKDEP
425 /*
426 * This is a bit complicated to understand, so worth a comment. What we assert
427 * here is that the GPU reset is not running on another thread in parallel.
428 *
429 * For this we trylock the read side of the reset semaphore, if that succeeds
430 * we know that the reset is not running in paralell.
431 *
432 * If the trylock fails we assert that we are either already holding the read
433 * side of the lock or are the reset thread itself and hold the write side of
434 * the lock.
435 */
436 if (in_task()) {
d0fb18b5
AG
437 if (down_read_trylock(&adev->reset_domain->sem))
438 up_read(&adev->reset_domain->sem);
56b53c0b 439 else
d0fb18b5 440 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
441 }
442#endif
443 return false;
444}
445
e3ecdffa 446/**
f7ee1874 447 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
448 *
449 * @adev: amdgpu_device pointer
450 * @reg: dword aligned register offset
451 * @acc_flags: access flags which require special behavior
452 *
453 * Returns the 32 bit value from the offset specified.
454 */
f7ee1874
HZ
455uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
456 uint32_t reg, uint32_t acc_flags)
d38ceaf9 457{
f4b373f4
TSD
458 uint32_t ret;
459
56b53c0b 460 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
461 return 0;
462
f7ee1874
HZ
463 if ((reg * 4) < adev->rmmio_size) {
464 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
465 amdgpu_sriov_runtime(adev) &&
d0fb18b5 466 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 467 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 468 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
469 } else {
470 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
471 }
472 } else {
473 ret = adev->pcie_rreg(adev, reg * 4);
81202807 474 }
bc992ba5 475
f7ee1874 476 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 477
f4b373f4 478 return ret;
d38ceaf9
AD
479}
480
421a2a30
ML
481/*
482 * MMIO register read with bytes helper functions
483 * @offset:bytes offset from MMIO start
484 *
485*/
486
e3ecdffa
AD
487/**
488 * amdgpu_mm_rreg8 - read a memory mapped IO register
489 *
490 * @adev: amdgpu_device pointer
491 * @offset: byte aligned register offset
492 *
493 * Returns the 8 bit value from the offset specified.
494 */
7cbbc745
AG
495uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
496{
56b53c0b 497 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
498 return 0;
499
421a2a30
ML
500 if (offset < adev->rmmio_size)
501 return (readb(adev->rmmio + offset));
502 BUG();
503}
504
505/*
506 * MMIO register write with bytes helper functions
507 * @offset:bytes offset from MMIO start
508 * @value: the value want to be written to the register
509 *
510*/
e3ecdffa
AD
511/**
512 * amdgpu_mm_wreg8 - read a memory mapped IO register
513 *
514 * @adev: amdgpu_device pointer
515 * @offset: byte aligned register offset
516 * @value: 8 bit value to write
517 *
518 * Writes the value specified to the offset specified.
519 */
7cbbc745
AG
520void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
521{
56b53c0b 522 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
523 return;
524
421a2a30
ML
525 if (offset < adev->rmmio_size)
526 writeb(value, adev->rmmio + offset);
527 else
528 BUG();
529}
530
e3ecdffa 531/**
f7ee1874 532 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
533 *
534 * @adev: amdgpu_device pointer
535 * @reg: dword aligned register offset
536 * @v: 32 bit value to write to the register
537 * @acc_flags: access flags which require special behavior
538 *
539 * Writes the value specified to the offset specified.
540 */
f7ee1874
HZ
541void amdgpu_device_wreg(struct amdgpu_device *adev,
542 uint32_t reg, uint32_t v,
543 uint32_t acc_flags)
d38ceaf9 544{
56b53c0b 545 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
546 return;
547
f7ee1874
HZ
548 if ((reg * 4) < adev->rmmio_size) {
549 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
550 amdgpu_sriov_runtime(adev) &&
d0fb18b5 551 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 552 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 553 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
554 } else {
555 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
556 }
557 } else {
558 adev->pcie_wreg(adev, reg * 4, v);
81202807 559 }
bc992ba5 560
f7ee1874 561 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 562}
d38ceaf9 563
03f2abb0 564/**
4cc9f86f 565 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 566 *
71579346
RB
567 * @adev: amdgpu_device pointer
568 * @reg: mmio/rlc register
569 * @v: value to write
570 *
571 * this function is invoked only for the debugfs register access
03f2abb0 572 */
f7ee1874
HZ
573void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
574 uint32_t reg, uint32_t v)
2e0cc4d4 575{
56b53c0b 576 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
577 return;
578
2e0cc4d4 579 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
580 adev->gfx.rlc.funcs &&
581 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 582 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 583 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
584 } else if ((reg * 4) >= adev->rmmio_size) {
585 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
586 } else {
587 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 588 }
d38ceaf9
AD
589}
590
d38ceaf9
AD
591/**
592 * amdgpu_mm_rdoorbell - read a doorbell dword
593 *
594 * @adev: amdgpu_device pointer
595 * @index: doorbell index
596 *
597 * Returns the value in the doorbell aperture at the
598 * requested doorbell index (CIK).
599 */
600u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
601{
56b53c0b 602 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
603 return 0;
604
0512e9ff 605 if (index < adev->doorbell.num_kernel_doorbells) {
d38ceaf9
AD
606 return readl(adev->doorbell.ptr + index);
607 } else {
608 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
609 return 0;
610 }
611}
612
613/**
614 * amdgpu_mm_wdoorbell - write a doorbell dword
615 *
616 * @adev: amdgpu_device pointer
617 * @index: doorbell index
618 * @v: value to write
619 *
620 * Writes @v to the doorbell aperture at the
621 * requested doorbell index (CIK).
622 */
623void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
624{
56b53c0b 625 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
626 return;
627
0512e9ff 628 if (index < adev->doorbell.num_kernel_doorbells) {
d38ceaf9
AD
629 writel(v, adev->doorbell.ptr + index);
630 } else {
631 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
632 }
633}
634
832be404
KW
635/**
636 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
637 *
638 * @adev: amdgpu_device pointer
639 * @index: doorbell index
640 *
641 * Returns the value in the doorbell aperture at the
642 * requested doorbell index (VEGA10+).
643 */
644u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
645{
56b53c0b 646 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
647 return 0;
648
0512e9ff 649 if (index < adev->doorbell.num_kernel_doorbells) {
832be404
KW
650 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
651 } else {
652 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
653 return 0;
654 }
655}
656
657/**
658 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
659 *
660 * @adev: amdgpu_device pointer
661 * @index: doorbell index
662 * @v: value to write
663 *
664 * Writes @v to the doorbell aperture at the
665 * requested doorbell index (VEGA10+).
666 */
667void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
668{
56b53c0b 669 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
670 return;
671
0512e9ff 672 if (index < adev->doorbell.num_kernel_doorbells) {
832be404
KW
673 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
674 } else {
675 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
676 }
677}
678
1bba3683
HZ
679/**
680 * amdgpu_device_indirect_rreg - read an indirect register
681 *
682 * @adev: amdgpu_device pointer
22f453fb 683 * @reg_addr: indirect register address to read from
1bba3683
HZ
684 *
685 * Returns the value of indirect register @reg_addr
686 */
687u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
688 u32 reg_addr)
689{
65ba96e9 690 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
691 void __iomem *pcie_index_offset;
692 void __iomem *pcie_data_offset;
65ba96e9
HZ
693 u32 r;
694
695 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
696 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
697
698 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
699 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
700 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
701
702 writel(reg_addr, pcie_index_offset);
703 readl(pcie_index_offset);
704 r = readl(pcie_data_offset);
705 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
706
707 return r;
708}
709
0c552ed3
LM
710u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
711 u64 reg_addr)
712{
713 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
714 u32 r;
715 void __iomem *pcie_index_offset;
716 void __iomem *pcie_index_hi_offset;
717 void __iomem *pcie_data_offset;
718
719 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
720 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
721 if (adev->nbio.funcs->get_pcie_index_hi_offset)
722 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
723 else
724 pcie_index_hi = 0;
725
726 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
727 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
728 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
729 if (pcie_index_hi != 0)
730 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
731 pcie_index_hi * 4;
732
733 writel(reg_addr, pcie_index_offset);
734 readl(pcie_index_offset);
735 if (pcie_index_hi != 0) {
736 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
737 readl(pcie_index_hi_offset);
738 }
739 r = readl(pcie_data_offset);
740
741 /* clear the high bits */
742 if (pcie_index_hi != 0) {
743 writel(0, pcie_index_hi_offset);
744 readl(pcie_index_hi_offset);
745 }
746
747 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
748
749 return r;
750}
751
1bba3683
HZ
752/**
753 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
754 *
755 * @adev: amdgpu_device pointer
22f453fb 756 * @reg_addr: indirect register address to read from
1bba3683
HZ
757 *
758 * Returns the value of indirect register @reg_addr
759 */
760u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
761 u32 reg_addr)
762{
65ba96e9 763 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
764 void __iomem *pcie_index_offset;
765 void __iomem *pcie_data_offset;
65ba96e9
HZ
766 u64 r;
767
768 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
769 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
770
771 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
772 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
773 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
774
775 /* read low 32 bits */
776 writel(reg_addr, pcie_index_offset);
777 readl(pcie_index_offset);
778 r = readl(pcie_data_offset);
779 /* read high 32 bits */
780 writel(reg_addr + 4, pcie_index_offset);
781 readl(pcie_index_offset);
782 r |= ((u64)readl(pcie_data_offset) << 32);
783 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
784
785 return r;
786}
787
788/**
789 * amdgpu_device_indirect_wreg - write an indirect register address
790 *
791 * @adev: amdgpu_device pointer
792 * @pcie_index: mmio register offset
793 * @pcie_data: mmio register offset
794 * @reg_addr: indirect register offset
795 * @reg_data: indirect register data
796 *
797 */
798void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
799 u32 reg_addr, u32 reg_data)
800{
65ba96e9 801 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
802 void __iomem *pcie_index_offset;
803 void __iomem *pcie_data_offset;
804
65ba96e9
HZ
805 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
806 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
807
1bba3683
HZ
808 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
809 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
810 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
811
812 writel(reg_addr, pcie_index_offset);
813 readl(pcie_index_offset);
814 writel(reg_data, pcie_data_offset);
815 readl(pcie_data_offset);
816 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
817}
818
0c552ed3
LM
819void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
820 u64 reg_addr, u32 reg_data)
821{
822 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
823 void __iomem *pcie_index_offset;
824 void __iomem *pcie_index_hi_offset;
825 void __iomem *pcie_data_offset;
826
827 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
828 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
829 if (adev->nbio.funcs->get_pcie_index_hi_offset)
830 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
831 else
832 pcie_index_hi = 0;
833
834 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
835 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
836 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
837 if (pcie_index_hi != 0)
838 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
839 pcie_index_hi * 4;
840
841 writel(reg_addr, pcie_index_offset);
842 readl(pcie_index_offset);
843 if (pcie_index_hi != 0) {
844 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
845 readl(pcie_index_hi_offset);
846 }
847 writel(reg_data, pcie_data_offset);
848 readl(pcie_data_offset);
849
850 /* clear the high bits */
851 if (pcie_index_hi != 0) {
852 writel(0, pcie_index_hi_offset);
853 readl(pcie_index_hi_offset);
854 }
855
856 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
857}
858
1bba3683
HZ
859/**
860 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
861 *
862 * @adev: amdgpu_device pointer
863 * @pcie_index: mmio register offset
864 * @pcie_data: mmio register offset
865 * @reg_addr: indirect register offset
866 * @reg_data: indirect register data
867 *
868 */
869void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
870 u32 reg_addr, u64 reg_data)
871{
65ba96e9 872 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
873 void __iomem *pcie_index_offset;
874 void __iomem *pcie_data_offset;
875
65ba96e9
HZ
876 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
877 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
878
1bba3683
HZ
879 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
880 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
881 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
882
883 /* write low 32 bits */
884 writel(reg_addr, pcie_index_offset);
885 readl(pcie_index_offset);
886 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
887 readl(pcie_data_offset);
888 /* write high 32 bits */
889 writel(reg_addr + 4, pcie_index_offset);
890 readl(pcie_index_offset);
891 writel((u32)(reg_data >> 32), pcie_data_offset);
892 readl(pcie_data_offset);
893 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
894}
895
dabc114e
HZ
896/**
897 * amdgpu_device_get_rev_id - query device rev_id
898 *
899 * @adev: amdgpu_device pointer
900 *
901 * Return device rev_id
902 */
903u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
904{
905 return adev->nbio.funcs->get_rev_id(adev);
906}
907
d38ceaf9
AD
908/**
909 * amdgpu_invalid_rreg - dummy reg read function
910 *
982a820b 911 * @adev: amdgpu_device pointer
d38ceaf9
AD
912 * @reg: offset of register
913 *
914 * Dummy register read function. Used for register blocks
915 * that certain asics don't have (all asics).
916 * Returns the value in the register.
917 */
918static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
919{
920 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
921 BUG();
922 return 0;
923}
924
0c552ed3
LM
925static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
926{
927 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
928 BUG();
929 return 0;
930}
931
d38ceaf9
AD
932/**
933 * amdgpu_invalid_wreg - dummy reg write function
934 *
982a820b 935 * @adev: amdgpu_device pointer
d38ceaf9
AD
936 * @reg: offset of register
937 * @v: value to write to the register
938 *
939 * Dummy register read function. Used for register blocks
940 * that certain asics don't have (all asics).
941 */
942static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
943{
944 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
945 reg, v);
946 BUG();
947}
948
0c552ed3
LM
949static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
950{
951 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
952 reg, v);
953 BUG();
954}
955
4fa1c6a6
TZ
956/**
957 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
958 *
982a820b 959 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
960 * @reg: offset of register
961 *
962 * Dummy register read function. Used for register blocks
963 * that certain asics don't have (all asics).
964 * Returns the value in the register.
965 */
966static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
967{
968 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
969 BUG();
970 return 0;
971}
972
973/**
974 * amdgpu_invalid_wreg64 - dummy reg write function
975 *
982a820b 976 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
977 * @reg: offset of register
978 * @v: value to write to the register
979 *
980 * Dummy register read function. Used for register blocks
981 * that certain asics don't have (all asics).
982 */
983static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
984{
985 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
986 reg, v);
987 BUG();
988}
989
d38ceaf9
AD
990/**
991 * amdgpu_block_invalid_rreg - dummy reg read function
992 *
982a820b 993 * @adev: amdgpu_device pointer
d38ceaf9
AD
994 * @block: offset of instance
995 * @reg: offset of register
996 *
997 * Dummy register read function. Used for register blocks
998 * that certain asics don't have (all asics).
999 * Returns the value in the register.
1000 */
1001static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1002 uint32_t block, uint32_t reg)
1003{
1004 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1005 reg, block);
1006 BUG();
1007 return 0;
1008}
1009
1010/**
1011 * amdgpu_block_invalid_wreg - dummy reg write function
1012 *
982a820b 1013 * @adev: amdgpu_device pointer
d38ceaf9
AD
1014 * @block: offset of instance
1015 * @reg: offset of register
1016 * @v: value to write to the register
1017 *
1018 * Dummy register read function. Used for register blocks
1019 * that certain asics don't have (all asics).
1020 */
1021static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1022 uint32_t block,
1023 uint32_t reg, uint32_t v)
1024{
1025 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1026 reg, block, v);
1027 BUG();
1028}
1029
4d2997ab
AD
1030/**
1031 * amdgpu_device_asic_init - Wrapper for atom asic_init
1032 *
982a820b 1033 * @adev: amdgpu_device pointer
4d2997ab
AD
1034 *
1035 * Does any asic specific work and then calls atom asic init.
1036 */
1037static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1038{
1039 amdgpu_asic_pre_asic_init(adev);
1040
5db392a0
LL
1041 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
1042 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
85d1bcc6
HZ
1043 return amdgpu_atomfirmware_asic_init(adev, true);
1044 else
1045 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
1046}
1047
e3ecdffa 1048/**
7ccfd79f 1049 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1050 *
982a820b 1051 * @adev: amdgpu_device pointer
e3ecdffa
AD
1052 *
1053 * Allocates a scratch page of VRAM for use by various things in the
1054 * driver.
1055 */
7ccfd79f 1056static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1057{
7ccfd79f
CK
1058 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1059 AMDGPU_GEM_DOMAIN_VRAM |
1060 AMDGPU_GEM_DOMAIN_GTT,
1061 &adev->mem_scratch.robj,
1062 &adev->mem_scratch.gpu_addr,
1063 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1064}
1065
e3ecdffa 1066/**
7ccfd79f 1067 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1068 *
982a820b 1069 * @adev: amdgpu_device pointer
e3ecdffa
AD
1070 *
1071 * Frees the VRAM scratch page.
1072 */
7ccfd79f 1073static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1074{
7ccfd79f 1075 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1076}
1077
1078/**
9c3f2b54 1079 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1080 *
1081 * @adev: amdgpu_device pointer
1082 * @registers: pointer to the register array
1083 * @array_size: size of the register array
1084 *
1085 * Programs an array or registers with and and or masks.
1086 * This is a helper for setting golden registers.
1087 */
9c3f2b54
AD
1088void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1089 const u32 *registers,
1090 const u32 array_size)
d38ceaf9
AD
1091{
1092 u32 tmp, reg, and_mask, or_mask;
1093 int i;
1094
1095 if (array_size % 3)
1096 return;
1097
47fc644f 1098 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1099 reg = registers[i + 0];
1100 and_mask = registers[i + 1];
1101 or_mask = registers[i + 2];
1102
1103 if (and_mask == 0xffffffff) {
1104 tmp = or_mask;
1105 } else {
1106 tmp = RREG32(reg);
1107 tmp &= ~and_mask;
e0d07657
HZ
1108 if (adev->family >= AMDGPU_FAMILY_AI)
1109 tmp |= (or_mask & and_mask);
1110 else
1111 tmp |= or_mask;
d38ceaf9
AD
1112 }
1113 WREG32(reg, tmp);
1114 }
1115}
1116
e3ecdffa
AD
1117/**
1118 * amdgpu_device_pci_config_reset - reset the GPU
1119 *
1120 * @adev: amdgpu_device pointer
1121 *
1122 * Resets the GPU using the pci config reset sequence.
1123 * Only applicable to asics prior to vega10.
1124 */
8111c387 1125void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1126{
1127 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1128}
1129
af484df8
AD
1130/**
1131 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1132 *
1133 * @adev: amdgpu_device pointer
1134 *
1135 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1136 */
1137int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1138{
1139 return pci_reset_function(adev->pdev);
1140}
1141
d38ceaf9
AD
1142/*
1143 * GPU doorbell aperture helpers function.
1144 */
1145/**
06ec9070 1146 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1147 *
1148 * @adev: amdgpu_device pointer
1149 *
1150 * Init doorbell driver information (CIK)
1151 * Returns 0 on success, error on failure.
1152 */
06ec9070 1153static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1154{
6585661d 1155
705e519e
CK
1156 /* No doorbell on SI hardware generation */
1157 if (adev->asic_type < CHIP_BONAIRE) {
1158 adev->doorbell.base = 0;
1159 adev->doorbell.size = 0;
0512e9ff 1160 adev->doorbell.num_kernel_doorbells = 0;
705e519e
CK
1161 adev->doorbell.ptr = NULL;
1162 return 0;
1163 }
1164
d6895ad3
CK
1165 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1166 return -EINVAL;
1167
22357775
AD
1168 amdgpu_asic_init_doorbell_index(adev);
1169
d38ceaf9
AD
1170 /* doorbell bar mapping */
1171 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1172 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1173
de33a329 1174 if (adev->enable_mes) {
0512e9ff 1175 adev->doorbell.num_kernel_doorbells =
de33a329
JX
1176 adev->doorbell.size / sizeof(u32);
1177 } else {
0512e9ff 1178 adev->doorbell.num_kernel_doorbells =
de33a329
JX
1179 min_t(u32, adev->doorbell.size / sizeof(u32),
1180 adev->doorbell_index.max_assignment+1);
0512e9ff 1181 if (adev->doorbell.num_kernel_doorbells == 0)
de33a329
JX
1182 return -EINVAL;
1183
1184 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1185 * paging queue doorbell use the second page. The
1186 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1187 * doorbells are in the first page. So with paging queue enabled,
0512e9ff 1188 * the max num_kernel_doorbells should + 1 page (0x400 in dword)
de33a329 1189 */
0ee20b86
LM
1190 if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
1191 adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0))
0512e9ff 1192 adev->doorbell.num_kernel_doorbells += 0x400;
de33a329 1193 }
ec3db8a6 1194
8972e5d2 1195 adev->doorbell.ptr = ioremap(adev->doorbell.base,
0512e9ff 1196 adev->doorbell.num_kernel_doorbells *
8972e5d2
CK
1197 sizeof(u32));
1198 if (adev->doorbell.ptr == NULL)
d38ceaf9 1199 return -ENOMEM;
d38ceaf9
AD
1200
1201 return 0;
1202}
1203
1204/**
06ec9070 1205 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1206 *
1207 * @adev: amdgpu_device pointer
1208 *
1209 * Tear down doorbell driver information (CIK)
1210 */
06ec9070 1211static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1212{
1213 iounmap(adev->doorbell.ptr);
1214 adev->doorbell.ptr = NULL;
1215}
1216
22cb0164 1217
d38ceaf9
AD
1218
1219/*
06ec9070 1220 * amdgpu_device_wb_*()
455a7bc2 1221 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1222 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1223 */
1224
1225/**
06ec9070 1226 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1227 *
1228 * @adev: amdgpu_device pointer
1229 *
1230 * Disables Writeback and frees the Writeback memory (all asics).
1231 * Used at driver shutdown.
1232 */
06ec9070 1233static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1234{
1235 if (adev->wb.wb_obj) {
a76ed485
AD
1236 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1237 &adev->wb.gpu_addr,
1238 (void **)&adev->wb.wb);
d38ceaf9
AD
1239 adev->wb.wb_obj = NULL;
1240 }
1241}
1242
1243/**
03f2abb0 1244 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1245 *
1246 * @adev: amdgpu_device pointer
1247 *
455a7bc2 1248 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1249 * Used at driver startup.
1250 * Returns 0 on success or an -error on failure.
1251 */
06ec9070 1252static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1253{
1254 int r;
1255
1256 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1257 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1258 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1259 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1260 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1261 (void **)&adev->wb.wb);
d38ceaf9
AD
1262 if (r) {
1263 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1264 return r;
1265 }
d38ceaf9
AD
1266
1267 adev->wb.num_wb = AMDGPU_MAX_WB;
1268 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1269
1270 /* clear wb memory */
73469585 1271 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1272 }
1273
1274 return 0;
1275}
1276
1277/**
131b4b36 1278 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1279 *
1280 * @adev: amdgpu_device pointer
1281 * @wb: wb index
1282 *
1283 * Allocate a wb slot for use by the driver (all asics).
1284 * Returns 0 on success or -EINVAL on failure.
1285 */
131b4b36 1286int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1287{
1288 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1289
97407b63 1290 if (offset < adev->wb.num_wb) {
7014285a 1291 __set_bit(offset, adev->wb.used);
63ae07ca 1292 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1293 return 0;
1294 } else {
1295 return -EINVAL;
1296 }
1297}
1298
d38ceaf9 1299/**
131b4b36 1300 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1301 *
1302 * @adev: amdgpu_device pointer
1303 * @wb: wb index
1304 *
1305 * Free a wb slot allocated for use by the driver (all asics)
1306 */
131b4b36 1307void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1308{
73469585 1309 wb >>= 3;
d38ceaf9 1310 if (wb < adev->wb.num_wb)
73469585 1311 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1312}
1313
d6895ad3
CK
1314/**
1315 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1316 *
1317 * @adev: amdgpu_device pointer
1318 *
1319 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1320 * to fail, but if any of the BARs is not accessible after the size we abort
1321 * driver loading by returning -ENODEV.
1322 */
1323int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1324{
453f617a 1325 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1326 struct pci_bus *root;
1327 struct resource *res;
1328 unsigned i;
d6895ad3
CK
1329 u16 cmd;
1330 int r;
1331
0c03b912 1332 /* Bypass for VF */
1333 if (amdgpu_sriov_vf(adev))
1334 return 0;
1335
b7221f2b
AD
1336 /* skip if the bios has already enabled large BAR */
1337 if (adev->gmc.real_vram_size &&
1338 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1339 return 0;
1340
31b8adab
CK
1341 /* Check if the root BUS has 64bit memory resources */
1342 root = adev->pdev->bus;
1343 while (root->parent)
1344 root = root->parent;
1345
1346 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1347 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1348 res->start > 0x100000000ull)
1349 break;
1350 }
1351
1352 /* Trying to resize is pointless without a root hub window above 4GB */
1353 if (!res)
1354 return 0;
1355
453f617a
ND
1356 /* Limit the BAR size to what is available */
1357 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1358 rbar_size);
1359
d6895ad3
CK
1360 /* Disable memory decoding while we change the BAR addresses and size */
1361 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1362 pci_write_config_word(adev->pdev, PCI_COMMAND,
1363 cmd & ~PCI_COMMAND_MEMORY);
1364
1365 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1366 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1367 if (adev->asic_type >= CHIP_BONAIRE)
1368 pci_release_resource(adev->pdev, 2);
1369
1370 pci_release_resource(adev->pdev, 0);
1371
1372 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1373 if (r == -ENOSPC)
1374 DRM_INFO("Not enough PCI address space for a large BAR.");
1375 else if (r && r != -ENOTSUPP)
1376 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1377
1378 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1379
1380 /* When the doorbell or fb BAR isn't available we have no chance of
1381 * using the device.
1382 */
06ec9070 1383 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1384 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1385 return -ENODEV;
1386
1387 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1388
1389 return 0;
1390}
a05502e5 1391
d38ceaf9
AD
1392/*
1393 * GPU helpers function.
1394 */
1395/**
39c640c0 1396 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1397 *
1398 * @adev: amdgpu_device pointer
1399 *
c836fec5
JQ
1400 * Check if the asic has been initialized (all asics) at driver startup
1401 * or post is needed if hw reset is performed.
1402 * Returns true if need or false if not.
d38ceaf9 1403 */
39c640c0 1404bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1405{
1406 uint32_t reg;
1407
bec86378
ML
1408 if (amdgpu_sriov_vf(adev))
1409 return false;
1410
1411 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1412 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1413 * some old smc fw still need driver do vPost otherwise gpu hang, while
1414 * those smc fw version above 22.15 doesn't have this flaw, so we force
1415 * vpost executed for smc version below 22.15
bec86378
ML
1416 */
1417 if (adev->asic_type == CHIP_FIJI) {
1418 int err;
1419 uint32_t fw_ver;
1420 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1421 /* force vPost if error occured */
1422 if (err)
1423 return true;
1424
1425 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1426 if (fw_ver < 0x00160e00)
1427 return true;
bec86378 1428 }
bec86378 1429 }
91fe77eb 1430
e3c1b071 1431 /* Don't post if we need to reset whole hive on init */
1432 if (adev->gmc.xgmi.pending_reset)
1433 return false;
1434
91fe77eb 1435 if (adev->has_hw_reset) {
1436 adev->has_hw_reset = false;
1437 return true;
1438 }
1439
1440 /* bios scratch used on CIK+ */
1441 if (adev->asic_type >= CHIP_BONAIRE)
1442 return amdgpu_atombios_scratch_need_asic_init(adev);
1443
1444 /* check MEM_SIZE for older asics */
1445 reg = amdgpu_asic_get_config_memsize(adev);
1446
1447 if ((reg != 0) && (reg != 0xffffffff))
1448 return false;
1449
1450 return true;
bec86378
ML
1451}
1452
0ab5d711
ML
1453/**
1454 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1455 *
1456 * @adev: amdgpu_device pointer
1457 *
1458 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1459 * be set for this device.
1460 *
1461 * Returns true if it should be used or false if not.
1462 */
1463bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1464{
1465 switch (amdgpu_aspm) {
1466 case -1:
1467 break;
1468 case 0:
1469 return false;
1470 case 1:
1471 return true;
1472 default:
1473 return false;
1474 }
1475 return pcie_aspm_enabled(adev->pdev);
1476}
1477
3ad5dcfe
KHF
1478bool amdgpu_device_aspm_support_quirk(void)
1479{
1480#if IS_ENABLED(CONFIG_X86)
1481 struct cpuinfo_x86 *c = &cpu_data(0);
1482
1483 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1484#else
1485 return true;
1486#endif
1487}
1488
d38ceaf9
AD
1489/* if we get transitioned to only one device, take VGA back */
1490/**
06ec9070 1491 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1492 *
bf44e8ce 1493 * @pdev: PCI device pointer
d38ceaf9
AD
1494 * @state: enable/disable vga decode
1495 *
1496 * Enable/disable vga decode (all asics).
1497 * Returns VGA resource flags.
1498 */
bf44e8ce
CH
1499static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1500 bool state)
d38ceaf9 1501{
bf44e8ce 1502 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1503 amdgpu_asic_set_vga_state(adev, state);
1504 if (state)
1505 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1506 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1507 else
1508 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1509}
1510
e3ecdffa
AD
1511/**
1512 * amdgpu_device_check_block_size - validate the vm block size
1513 *
1514 * @adev: amdgpu_device pointer
1515 *
1516 * Validates the vm block size specified via module parameter.
1517 * The vm block size defines number of bits in page table versus page directory,
1518 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1519 * page table and the remaining bits are in the page directory.
1520 */
06ec9070 1521static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1522{
1523 /* defines number of bits in page table versus page directory,
1524 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1525 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1526 if (amdgpu_vm_block_size == -1)
1527 return;
a1adf8be 1528
bab4fee7 1529 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1530 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1531 amdgpu_vm_block_size);
97489129 1532 amdgpu_vm_block_size = -1;
a1adf8be 1533 }
a1adf8be
CZ
1534}
1535
e3ecdffa
AD
1536/**
1537 * amdgpu_device_check_vm_size - validate the vm size
1538 *
1539 * @adev: amdgpu_device pointer
1540 *
1541 * Validates the vm size in GB specified via module parameter.
1542 * The VM size is the size of the GPU virtual memory space in GB.
1543 */
06ec9070 1544static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1545{
64dab074
AD
1546 /* no need to check the default value */
1547 if (amdgpu_vm_size == -1)
1548 return;
1549
83ca145d
ZJ
1550 if (amdgpu_vm_size < 1) {
1551 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1552 amdgpu_vm_size);
f3368128 1553 amdgpu_vm_size = -1;
83ca145d 1554 }
83ca145d
ZJ
1555}
1556
7951e376
RZ
1557static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1558{
1559 struct sysinfo si;
a9d4fe2f 1560 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1561 uint64_t total_memory;
1562 uint64_t dram_size_seven_GB = 0x1B8000000;
1563 uint64_t dram_size_three_GB = 0xB8000000;
1564
1565 if (amdgpu_smu_memory_pool_size == 0)
1566 return;
1567
1568 if (!is_os_64) {
1569 DRM_WARN("Not 64-bit OS, feature not supported\n");
1570 goto def_value;
1571 }
1572 si_meminfo(&si);
1573 total_memory = (uint64_t)si.totalram * si.mem_unit;
1574
1575 if ((amdgpu_smu_memory_pool_size == 1) ||
1576 (amdgpu_smu_memory_pool_size == 2)) {
1577 if (total_memory < dram_size_three_GB)
1578 goto def_value1;
1579 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1580 (amdgpu_smu_memory_pool_size == 8)) {
1581 if (total_memory < dram_size_seven_GB)
1582 goto def_value1;
1583 } else {
1584 DRM_WARN("Smu memory pool size not supported\n");
1585 goto def_value;
1586 }
1587 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1588
1589 return;
1590
1591def_value1:
1592 DRM_WARN("No enough system memory\n");
1593def_value:
1594 adev->pm.smu_prv_buffer_size = 0;
1595}
1596
9f6a7857
HR
1597static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1598{
1599 if (!(adev->flags & AMD_IS_APU) ||
1600 adev->asic_type < CHIP_RAVEN)
1601 return 0;
1602
1603 switch (adev->asic_type) {
1604 case CHIP_RAVEN:
1605 if (adev->pdev->device == 0x15dd)
1606 adev->apu_flags |= AMD_APU_IS_RAVEN;
1607 if (adev->pdev->device == 0x15d8)
1608 adev->apu_flags |= AMD_APU_IS_PICASSO;
1609 break;
1610 case CHIP_RENOIR:
1611 if ((adev->pdev->device == 0x1636) ||
1612 (adev->pdev->device == 0x164c))
1613 adev->apu_flags |= AMD_APU_IS_RENOIR;
1614 else
1615 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1616 break;
1617 case CHIP_VANGOGH:
1618 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1619 break;
1620 case CHIP_YELLOW_CARP:
1621 break;
d0f56dc2 1622 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1623 if ((adev->pdev->device == 0x13FE) ||
1624 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1625 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1626 break;
9f6a7857 1627 default:
4eaf21b7 1628 break;
9f6a7857
HR
1629 }
1630
1631 return 0;
1632}
1633
d38ceaf9 1634/**
06ec9070 1635 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1636 *
1637 * @adev: amdgpu_device pointer
1638 *
1639 * Validates certain module parameters and updates
1640 * the associated values used by the driver (all asics).
1641 */
912dfc84 1642static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1643{
5b011235
CZ
1644 if (amdgpu_sched_jobs < 4) {
1645 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1646 amdgpu_sched_jobs);
1647 amdgpu_sched_jobs = 4;
47fc644f 1648 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1649 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1650 amdgpu_sched_jobs);
1651 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1652 }
d38ceaf9 1653
83e74db6 1654 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1655 /* gart size must be greater or equal to 32M */
1656 dev_warn(adev->dev, "gart size (%d) too small\n",
1657 amdgpu_gart_size);
83e74db6 1658 amdgpu_gart_size = -1;
d38ceaf9
AD
1659 }
1660
36d38372 1661 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1662 /* gtt size must be greater or equal to 32M */
36d38372
CK
1663 dev_warn(adev->dev, "gtt size (%d) too small\n",
1664 amdgpu_gtt_size);
1665 amdgpu_gtt_size = -1;
d38ceaf9
AD
1666 }
1667
d07f14be
RH
1668 /* valid range is between 4 and 9 inclusive */
1669 if (amdgpu_vm_fragment_size != -1 &&
1670 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1671 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1672 amdgpu_vm_fragment_size = -1;
1673 }
1674
5d5bd5e3
KW
1675 if (amdgpu_sched_hw_submission < 2) {
1676 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1677 amdgpu_sched_hw_submission);
1678 amdgpu_sched_hw_submission = 2;
1679 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1680 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1681 amdgpu_sched_hw_submission);
1682 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1683 }
1684
2656fd23
AG
1685 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1686 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1687 amdgpu_reset_method = -1;
1688 }
1689
7951e376
RZ
1690 amdgpu_device_check_smu_prv_buffer_size(adev);
1691
06ec9070 1692 amdgpu_device_check_vm_size(adev);
d38ceaf9 1693
06ec9070 1694 amdgpu_device_check_block_size(adev);
6a7f76e7 1695
19aede77 1696 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1697
e3c00faa 1698 return 0;
d38ceaf9
AD
1699}
1700
1701/**
1702 * amdgpu_switcheroo_set_state - set switcheroo state
1703 *
1704 * @pdev: pci dev pointer
1694467b 1705 * @state: vga_switcheroo state
d38ceaf9 1706 *
12024b17 1707 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1708 * the asics before or after it is powered up using ACPI methods.
1709 */
8aba21b7
LT
1710static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1711 enum vga_switcheroo_state state)
d38ceaf9
AD
1712{
1713 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1714 int r;
d38ceaf9 1715
b98c6299 1716 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1717 return;
1718
1719 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1720 pr_info("switched on\n");
d38ceaf9
AD
1721 /* don't suspend or resume card normally */
1722 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1723
8f66090b
TZ
1724 pci_set_power_state(pdev, PCI_D0);
1725 amdgpu_device_load_pci_state(pdev);
1726 r = pci_enable_device(pdev);
de185019
AD
1727 if (r)
1728 DRM_WARN("pci_enable_device failed (%d)\n", r);
1729 amdgpu_device_resume(dev, true);
d38ceaf9 1730
d38ceaf9 1731 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1732 } else {
dd4fa6c1 1733 pr_info("switched off\n");
d38ceaf9 1734 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1735 amdgpu_device_suspend(dev, true);
8f66090b 1736 amdgpu_device_cache_pci_state(pdev);
de185019 1737 /* Shut down the device */
8f66090b
TZ
1738 pci_disable_device(pdev);
1739 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1740 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1741 }
1742}
1743
1744/**
1745 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1746 *
1747 * @pdev: pci dev pointer
1748 *
1749 * Callback for the switcheroo driver. Check of the switcheroo
1750 * state can be changed.
1751 * Returns true if the state can be changed, false if not.
1752 */
1753static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1754{
1755 struct drm_device *dev = pci_get_drvdata(pdev);
1756
1757 /*
1758 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1759 * locking inversion with the driver load path. And the access here is
1760 * completely racy anyway. So don't bother with locking for now.
1761 */
7e13ad89 1762 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1763}
1764
1765static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1766 .set_gpu_state = amdgpu_switcheroo_set_state,
1767 .reprobe = NULL,
1768 .can_switch = amdgpu_switcheroo_can_switch,
1769};
1770
e3ecdffa
AD
1771/**
1772 * amdgpu_device_ip_set_clockgating_state - set the CG state
1773 *
87e3f136 1774 * @dev: amdgpu_device pointer
e3ecdffa
AD
1775 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1776 * @state: clockgating state (gate or ungate)
1777 *
1778 * Sets the requested clockgating state for all instances of
1779 * the hardware IP specified.
1780 * Returns the error code from the last instance.
1781 */
43fa561f 1782int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1783 enum amd_ip_block_type block_type,
1784 enum amd_clockgating_state state)
d38ceaf9 1785{
43fa561f 1786 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1787 int i, r = 0;
1788
1789 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1790 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1791 continue;
c722865a
RZ
1792 if (adev->ip_blocks[i].version->type != block_type)
1793 continue;
1794 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1795 continue;
1796 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1797 (void *)adev, state);
1798 if (r)
1799 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1800 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1801 }
1802 return r;
1803}
1804
e3ecdffa
AD
1805/**
1806 * amdgpu_device_ip_set_powergating_state - set the PG state
1807 *
87e3f136 1808 * @dev: amdgpu_device pointer
e3ecdffa
AD
1809 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1810 * @state: powergating state (gate or ungate)
1811 *
1812 * Sets the requested powergating state for all instances of
1813 * the hardware IP specified.
1814 * Returns the error code from the last instance.
1815 */
43fa561f 1816int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1817 enum amd_ip_block_type block_type,
1818 enum amd_powergating_state state)
d38ceaf9 1819{
43fa561f 1820 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1821 int i, r = 0;
1822
1823 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1824 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1825 continue;
c722865a
RZ
1826 if (adev->ip_blocks[i].version->type != block_type)
1827 continue;
1828 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1829 continue;
1830 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1831 (void *)adev, state);
1832 if (r)
1833 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1834 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1835 }
1836 return r;
1837}
1838
e3ecdffa
AD
1839/**
1840 * amdgpu_device_ip_get_clockgating_state - get the CG state
1841 *
1842 * @adev: amdgpu_device pointer
1843 * @flags: clockgating feature flags
1844 *
1845 * Walks the list of IPs on the device and updates the clockgating
1846 * flags for each IP.
1847 * Updates @flags with the feature flags for each hardware IP where
1848 * clockgating is enabled.
1849 */
2990a1fc 1850void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1851 u64 *flags)
6cb2d4e4
HR
1852{
1853 int i;
1854
1855 for (i = 0; i < adev->num_ip_blocks; i++) {
1856 if (!adev->ip_blocks[i].status.valid)
1857 continue;
1858 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1859 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1860 }
1861}
1862
e3ecdffa
AD
1863/**
1864 * amdgpu_device_ip_wait_for_idle - wait for idle
1865 *
1866 * @adev: amdgpu_device pointer
1867 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1868 *
1869 * Waits for the request hardware IP to be idle.
1870 * Returns 0 for success or a negative error code on failure.
1871 */
2990a1fc
AD
1872int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1873 enum amd_ip_block_type block_type)
5dbbb60b
AD
1874{
1875 int i, r;
1876
1877 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1878 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1879 continue;
a1255107
AD
1880 if (adev->ip_blocks[i].version->type == block_type) {
1881 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1882 if (r)
1883 return r;
1884 break;
1885 }
1886 }
1887 return 0;
1888
1889}
1890
e3ecdffa
AD
1891/**
1892 * amdgpu_device_ip_is_idle - is the hardware IP idle
1893 *
1894 * @adev: amdgpu_device pointer
1895 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1896 *
1897 * Check if the hardware IP is idle or not.
1898 * Returns true if it the IP is idle, false if not.
1899 */
2990a1fc
AD
1900bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1901 enum amd_ip_block_type block_type)
5dbbb60b
AD
1902{
1903 int i;
1904
1905 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1906 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1907 continue;
a1255107
AD
1908 if (adev->ip_blocks[i].version->type == block_type)
1909 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1910 }
1911 return true;
1912
1913}
1914
e3ecdffa
AD
1915/**
1916 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1917 *
1918 * @adev: amdgpu_device pointer
87e3f136 1919 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1920 *
1921 * Returns a pointer to the hardware IP block structure
1922 * if it exists for the asic, otherwise NULL.
1923 */
2990a1fc
AD
1924struct amdgpu_ip_block *
1925amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1926 enum amd_ip_block_type type)
d38ceaf9
AD
1927{
1928 int i;
1929
1930 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1931 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1932 return &adev->ip_blocks[i];
1933
1934 return NULL;
1935}
1936
1937/**
2990a1fc 1938 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1939 *
1940 * @adev: amdgpu_device pointer
5fc3aeeb 1941 * @type: enum amd_ip_block_type
d38ceaf9
AD
1942 * @major: major version
1943 * @minor: minor version
1944 *
1945 * return 0 if equal or greater
1946 * return 1 if smaller or the ip_block doesn't exist
1947 */
2990a1fc
AD
1948int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1949 enum amd_ip_block_type type,
1950 u32 major, u32 minor)
d38ceaf9 1951{
2990a1fc 1952 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1953
a1255107
AD
1954 if (ip_block && ((ip_block->version->major > major) ||
1955 ((ip_block->version->major == major) &&
1956 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1957 return 0;
1958
1959 return 1;
1960}
1961
a1255107 1962/**
2990a1fc 1963 * amdgpu_device_ip_block_add
a1255107
AD
1964 *
1965 * @adev: amdgpu_device pointer
1966 * @ip_block_version: pointer to the IP to add
1967 *
1968 * Adds the IP block driver information to the collection of IPs
1969 * on the asic.
1970 */
2990a1fc
AD
1971int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1972 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1973{
1974 if (!ip_block_version)
1975 return -EINVAL;
1976
7bd939d0
LG
1977 switch (ip_block_version->type) {
1978 case AMD_IP_BLOCK_TYPE_VCN:
1979 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1980 return 0;
1981 break;
1982 case AMD_IP_BLOCK_TYPE_JPEG:
1983 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1984 return 0;
1985 break;
1986 default:
1987 break;
1988 }
1989
e966a725 1990 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1991 ip_block_version->funcs->name);
1992
a1255107
AD
1993 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1994
1995 return 0;
1996}
1997
e3ecdffa
AD
1998/**
1999 * amdgpu_device_enable_virtual_display - enable virtual display feature
2000 *
2001 * @adev: amdgpu_device pointer
2002 *
2003 * Enabled the virtual display feature if the user has enabled it via
2004 * the module parameter virtual_display. This feature provides a virtual
2005 * display hardware on headless boards or in virtualized environments.
2006 * This function parses and validates the configuration string specified by
2007 * the user and configues the virtual display configuration (number of
2008 * virtual connectors, crtcs, etc.) specified.
2009 */
483ef985 2010static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2011{
2012 adev->enable_virtual_display = false;
2013
2014 if (amdgpu_virtual_display) {
8f66090b 2015 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2016 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2017
2018 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2019 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2020 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2021 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2022 if (!strcmp("all", pciaddname)
2023 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2024 long num_crtc;
2025 int res = -1;
2026
9accf2fd 2027 adev->enable_virtual_display = true;
0f66356d
ED
2028
2029 if (pciaddname_tmp)
2030 res = kstrtol(pciaddname_tmp, 10,
2031 &num_crtc);
2032
2033 if (!res) {
2034 if (num_crtc < 1)
2035 num_crtc = 1;
2036 if (num_crtc > 6)
2037 num_crtc = 6;
2038 adev->mode_info.num_crtc = num_crtc;
2039 } else {
2040 adev->mode_info.num_crtc = 1;
2041 }
9accf2fd
ED
2042 break;
2043 }
2044 }
2045
0f66356d
ED
2046 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2047 amdgpu_virtual_display, pci_address_name,
2048 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2049
2050 kfree(pciaddstr);
2051 }
2052}
2053
25263da3
AD
2054void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2055{
2056 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2057 adev->mode_info.num_crtc = 1;
2058 adev->enable_virtual_display = true;
2059 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2060 adev->enable_virtual_display, adev->mode_info.num_crtc);
2061 }
2062}
2063
e3ecdffa
AD
2064/**
2065 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2066 *
2067 * @adev: amdgpu_device pointer
2068 *
2069 * Parses the asic configuration parameters specified in the gpu info
2070 * firmware and makes them availale to the driver for use in configuring
2071 * the asic.
2072 * Returns 0 on success, -EINVAL on failure.
2073 */
e2a75f88
AD
2074static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2075{
e2a75f88 2076 const char *chip_name;
c0a43457 2077 char fw_name[40];
e2a75f88
AD
2078 int err;
2079 const struct gpu_info_firmware_header_v1_0 *hdr;
2080
ab4fe3e1
HR
2081 adev->firmware.gpu_info_fw = NULL;
2082
72de33f8 2083 if (adev->mman.discovery_bin) {
cc375d8c
TY
2084 /*
2085 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2086 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2087 * when DAL no longer needs it.
2088 */
2089 if (adev->asic_type != CHIP_NAVI12)
2090 return 0;
258620d0
AD
2091 }
2092
e2a75f88 2093 switch (adev->asic_type) {
e2a75f88
AD
2094 default:
2095 return 0;
2096 case CHIP_VEGA10:
2097 chip_name = "vega10";
2098 break;
3f76dced
AD
2099 case CHIP_VEGA12:
2100 chip_name = "vega12";
2101 break;
2d2e5e7e 2102 case CHIP_RAVEN:
54f78a76 2103 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2104 chip_name = "raven2";
54f78a76 2105 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2106 chip_name = "picasso";
54c4d17e
FX
2107 else
2108 chip_name = "raven";
2d2e5e7e 2109 break;
65e60f6e
LM
2110 case CHIP_ARCTURUS:
2111 chip_name = "arcturus";
2112 break;
42b325e5
XY
2113 case CHIP_NAVI12:
2114 chip_name = "navi12";
2115 break;
e2a75f88
AD
2116 }
2117
2118 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2119 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2120 if (err) {
2121 dev_err(adev->dev,
b31d3063 2122 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2123 fw_name);
2124 goto out;
2125 }
2126
ab4fe3e1 2127 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2128 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2129
2130 switch (hdr->version_major) {
2131 case 1:
2132 {
2133 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2134 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2135 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2136
cc375d8c
TY
2137 /*
2138 * Should be droped when DAL no longer needs it.
2139 */
2140 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2141 goto parse_soc_bounding_box;
2142
b5ab16bf
AD
2143 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2144 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2145 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2146 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2147 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2148 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2149 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2150 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2151 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2152 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2153 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2154 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2155 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2156 adev->gfx.cu_info.max_waves_per_simd =
2157 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2158 adev->gfx.cu_info.max_scratch_slots_per_cu =
2159 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2160 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2161 if (hdr->version_minor >= 1) {
35c2e910
HZ
2162 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2163 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2164 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2165 adev->gfx.config.num_sc_per_sh =
2166 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2167 adev->gfx.config.num_packer_per_sc =
2168 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2169 }
ec51d3fa
XY
2170
2171parse_soc_bounding_box:
ec51d3fa
XY
2172 /*
2173 * soc bounding box info is not integrated in disocovery table,
258620d0 2174 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2175 */
48321c3d
HW
2176 if (hdr->version_minor == 2) {
2177 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2178 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2179 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2180 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2181 }
e2a75f88
AD
2182 break;
2183 }
2184 default:
2185 dev_err(adev->dev,
2186 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2187 err = -EINVAL;
2188 goto out;
2189 }
2190out:
e2a75f88
AD
2191 return err;
2192}
2193
e3ecdffa
AD
2194/**
2195 * amdgpu_device_ip_early_init - run early init for hardware IPs
2196 *
2197 * @adev: amdgpu_device pointer
2198 *
2199 * Early initialization pass for hardware IPs. The hardware IPs that make
2200 * up each asic are discovered each IP's early_init callback is run. This
2201 * is the first stage in initializing the asic.
2202 * Returns 0 on success, negative error code on failure.
2203 */
06ec9070 2204static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2205{
901e2be2
AD
2206 struct drm_device *dev = adev_to_drm(adev);
2207 struct pci_dev *parent;
aaa36a97 2208 int i, r;
ced69502 2209 bool total;
d38ceaf9 2210
483ef985 2211 amdgpu_device_enable_virtual_display(adev);
a6be7570 2212
00a979f3 2213 if (amdgpu_sriov_vf(adev)) {
00a979f3 2214 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2215 if (r)
2216 return r;
00a979f3
WS
2217 }
2218
d38ceaf9 2219 switch (adev->asic_type) {
33f34802
KW
2220#ifdef CONFIG_DRM_AMDGPU_SI
2221 case CHIP_VERDE:
2222 case CHIP_TAHITI:
2223 case CHIP_PITCAIRN:
2224 case CHIP_OLAND:
2225 case CHIP_HAINAN:
295d0daf 2226 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2227 r = si_set_ip_blocks(adev);
2228 if (r)
2229 return r;
2230 break;
2231#endif
a2e73f56
AD
2232#ifdef CONFIG_DRM_AMDGPU_CIK
2233 case CHIP_BONAIRE:
2234 case CHIP_HAWAII:
2235 case CHIP_KAVERI:
2236 case CHIP_KABINI:
2237 case CHIP_MULLINS:
e1ad2d53 2238 if (adev->flags & AMD_IS_APU)
a2e73f56 2239 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2240 else
2241 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2242
2243 r = cik_set_ip_blocks(adev);
2244 if (r)
2245 return r;
2246 break;
2247#endif
da87c30b
AD
2248 case CHIP_TOPAZ:
2249 case CHIP_TONGA:
2250 case CHIP_FIJI:
2251 case CHIP_POLARIS10:
2252 case CHIP_POLARIS11:
2253 case CHIP_POLARIS12:
2254 case CHIP_VEGAM:
2255 case CHIP_CARRIZO:
2256 case CHIP_STONEY:
2257 if (adev->flags & AMD_IS_APU)
2258 adev->family = AMDGPU_FAMILY_CZ;
2259 else
2260 adev->family = AMDGPU_FAMILY_VI;
2261
2262 r = vi_set_ip_blocks(adev);
2263 if (r)
2264 return r;
2265 break;
d38ceaf9 2266 default:
63352b7f
AD
2267 r = amdgpu_discovery_set_ip_blocks(adev);
2268 if (r)
2269 return r;
2270 break;
d38ceaf9
AD
2271 }
2272
901e2be2
AD
2273 if (amdgpu_has_atpx() &&
2274 (amdgpu_is_atpx_hybrid() ||
2275 amdgpu_has_atpx_dgpu_power_cntl()) &&
2276 ((adev->flags & AMD_IS_APU) == 0) &&
2277 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2278 adev->flags |= AMD_IS_PX;
2279
85ac2021
AD
2280 if (!(adev->flags & AMD_IS_APU)) {
2281 parent = pci_upstream_bridge(adev->pdev);
2282 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2283 }
901e2be2 2284
1884734a 2285
3b94fb10 2286 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2287 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2288 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2289 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2290 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2291
ced69502 2292 total = true;
d38ceaf9
AD
2293 for (i = 0; i < adev->num_ip_blocks; i++) {
2294 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2295 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2296 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2297 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2298 } else {
a1255107
AD
2299 if (adev->ip_blocks[i].version->funcs->early_init) {
2300 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2301 if (r == -ENOENT) {
a1255107 2302 adev->ip_blocks[i].status.valid = false;
2c1a2784 2303 } else if (r) {
a1255107
AD
2304 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2305 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2306 total = false;
2c1a2784 2307 } else {
a1255107 2308 adev->ip_blocks[i].status.valid = true;
2c1a2784 2309 }
974e6b64 2310 } else {
a1255107 2311 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2312 }
d38ceaf9 2313 }
21a249ca
AD
2314 /* get the vbios after the asic_funcs are set up */
2315 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2316 r = amdgpu_device_parse_gpu_info_fw(adev);
2317 if (r)
2318 return r;
2319
21a249ca
AD
2320 /* Read BIOS */
2321 if (!amdgpu_get_bios(adev))
2322 return -EINVAL;
2323
2324 r = amdgpu_atombios_init(adev);
2325 if (r) {
2326 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2327 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2328 return r;
2329 }
77eabc6f
PJZ
2330
2331 /*get pf2vf msg info at it's earliest time*/
2332 if (amdgpu_sriov_vf(adev))
2333 amdgpu_virt_init_data_exchange(adev);
2334
21a249ca 2335 }
d38ceaf9 2336 }
ced69502
ML
2337 if (!total)
2338 return -ENODEV;
d38ceaf9 2339
00fa4035 2340 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2341 adev->cg_flags &= amdgpu_cg_mask;
2342 adev->pg_flags &= amdgpu_pg_mask;
2343
d38ceaf9
AD
2344 return 0;
2345}
2346
0a4f2520
RZ
2347static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2348{
2349 int i, r;
2350
2351 for (i = 0; i < adev->num_ip_blocks; i++) {
2352 if (!adev->ip_blocks[i].status.sw)
2353 continue;
2354 if (adev->ip_blocks[i].status.hw)
2355 continue;
2356 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2357 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2358 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2359 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2360 if (r) {
2361 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2362 adev->ip_blocks[i].version->funcs->name, r);
2363 return r;
2364 }
2365 adev->ip_blocks[i].status.hw = true;
2366 }
2367 }
2368
2369 return 0;
2370}
2371
2372static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2373{
2374 int i, r;
2375
2376 for (i = 0; i < adev->num_ip_blocks; i++) {
2377 if (!adev->ip_blocks[i].status.sw)
2378 continue;
2379 if (adev->ip_blocks[i].status.hw)
2380 continue;
2381 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2382 if (r) {
2383 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2384 adev->ip_blocks[i].version->funcs->name, r);
2385 return r;
2386 }
2387 adev->ip_blocks[i].status.hw = true;
2388 }
2389
2390 return 0;
2391}
2392
7a3e0bb2
RZ
2393static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2394{
2395 int r = 0;
2396 int i;
80f41f84 2397 uint32_t smu_version;
7a3e0bb2
RZ
2398
2399 if (adev->asic_type >= CHIP_VEGA10) {
2400 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2401 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2402 continue;
2403
e3c1b071 2404 if (!adev->ip_blocks[i].status.sw)
2405 continue;
2406
482f0e53
ML
2407 /* no need to do the fw loading again if already done*/
2408 if (adev->ip_blocks[i].status.hw == true)
2409 break;
2410
53b3f8f4 2411 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2412 r = adev->ip_blocks[i].version->funcs->resume(adev);
2413 if (r) {
2414 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2415 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2416 return r;
2417 }
2418 } else {
2419 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2420 if (r) {
2421 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2422 adev->ip_blocks[i].version->funcs->name, r);
2423 return r;
7a3e0bb2 2424 }
7a3e0bb2 2425 }
482f0e53
ML
2426
2427 adev->ip_blocks[i].status.hw = true;
2428 break;
7a3e0bb2
RZ
2429 }
2430 }
482f0e53 2431
8973d9ec
ED
2432 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2433 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2434
80f41f84 2435 return r;
7a3e0bb2
RZ
2436}
2437
5fd8518d
AG
2438static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2439{
2440 long timeout;
2441 int r, i;
2442
2443 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2444 struct amdgpu_ring *ring = adev->rings[i];
2445
2446 /* No need to setup the GPU scheduler for rings that don't need it */
2447 if (!ring || ring->no_scheduler)
2448 continue;
2449
2450 switch (ring->funcs->type) {
2451 case AMDGPU_RING_TYPE_GFX:
2452 timeout = adev->gfx_timeout;
2453 break;
2454 case AMDGPU_RING_TYPE_COMPUTE:
2455 timeout = adev->compute_timeout;
2456 break;
2457 case AMDGPU_RING_TYPE_SDMA:
2458 timeout = adev->sdma_timeout;
2459 break;
2460 default:
2461 timeout = adev->video_timeout;
2462 break;
2463 }
2464
2465 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2466 ring->num_hw_submission, 0,
8ab62eda
JG
2467 timeout, adev->reset_domain->wq,
2468 ring->sched_score, ring->name,
2469 adev->dev);
5fd8518d
AG
2470 if (r) {
2471 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2472 ring->name);
2473 return r;
2474 }
2475 }
2476
d425c6f4
JZ
2477 amdgpu_xcp_update_partition_sched_list(adev);
2478
5fd8518d
AG
2479 return 0;
2480}
2481
2482
e3ecdffa
AD
2483/**
2484 * amdgpu_device_ip_init - run init for hardware IPs
2485 *
2486 * @adev: amdgpu_device pointer
2487 *
2488 * Main initialization pass for hardware IPs. The list of all the hardware
2489 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2490 * are run. sw_init initializes the software state associated with each IP
2491 * and hw_init initializes the hardware associated with each IP.
2492 * Returns 0 on success, negative error code on failure.
2493 */
06ec9070 2494static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2495{
2496 int i, r;
2497
c030f2e4 2498 r = amdgpu_ras_init(adev);
2499 if (r)
2500 return r;
2501
d38ceaf9 2502 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2503 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2504 continue;
a1255107 2505 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2506 if (r) {
a1255107
AD
2507 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2508 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2509 goto init_failed;
2c1a2784 2510 }
a1255107 2511 adev->ip_blocks[i].status.sw = true;
bfca0289 2512
c1c39032
AD
2513 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2514 /* need to do common hw init early so everything is set up for gmc */
2515 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2516 if (r) {
2517 DRM_ERROR("hw_init %d failed %d\n", i, r);
2518 goto init_failed;
2519 }
2520 adev->ip_blocks[i].status.hw = true;
2521 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2522 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2523 /* Try to reserve bad pages early */
2524 if (amdgpu_sriov_vf(adev))
2525 amdgpu_virt_exchange_data(adev);
2526
7ccfd79f 2527 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2528 if (r) {
7ccfd79f 2529 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2530 goto init_failed;
2c1a2784 2531 }
a1255107 2532 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2533 if (r) {
2534 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2535 goto init_failed;
2c1a2784 2536 }
06ec9070 2537 r = amdgpu_device_wb_init(adev);
2c1a2784 2538 if (r) {
06ec9070 2539 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2540 goto init_failed;
2c1a2784 2541 }
a1255107 2542 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2543
2544 /* right after GMC hw init, we create CSA */
8a1fbb4a 2545 if (amdgpu_mcbp) {
1e256e27 2546 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2547 AMDGPU_GEM_DOMAIN_VRAM |
2548 AMDGPU_GEM_DOMAIN_GTT,
2549 AMDGPU_CSA_SIZE);
2493664f
ML
2550 if (r) {
2551 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2552 goto init_failed;
2493664f
ML
2553 }
2554 }
d38ceaf9
AD
2555 }
2556 }
2557
c9ffa427 2558 if (amdgpu_sriov_vf(adev))
22c16d25 2559 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2560
533aed27
AG
2561 r = amdgpu_ib_pool_init(adev);
2562 if (r) {
2563 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2564 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2565 goto init_failed;
2566 }
2567
c8963ea4
RZ
2568 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2569 if (r)
72d3f592 2570 goto init_failed;
0a4f2520
RZ
2571
2572 r = amdgpu_device_ip_hw_init_phase1(adev);
2573 if (r)
72d3f592 2574 goto init_failed;
0a4f2520 2575
7a3e0bb2
RZ
2576 r = amdgpu_device_fw_loading(adev);
2577 if (r)
72d3f592 2578 goto init_failed;
7a3e0bb2 2579
0a4f2520
RZ
2580 r = amdgpu_device_ip_hw_init_phase2(adev);
2581 if (r)
72d3f592 2582 goto init_failed;
d38ceaf9 2583
121a2bc6
AG
2584 /*
2585 * retired pages will be loaded from eeprom and reserved here,
2586 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2587 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2588 * for I2C communication which only true at this point.
b82e65a9
GC
2589 *
2590 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2591 * failure from bad gpu situation and stop amdgpu init process
2592 * accordingly. For other failed cases, it will still release all
2593 * the resource and print error message, rather than returning one
2594 * negative value to upper level.
121a2bc6
AG
2595 *
2596 * Note: theoretically, this should be called before all vram allocations
2597 * to protect retired page from abusing
2598 */
b82e65a9
GC
2599 r = amdgpu_ras_recovery_init(adev);
2600 if (r)
2601 goto init_failed;
121a2bc6 2602
cfbb6b00
AG
2603 /**
2604 * In case of XGMI grab extra reference for reset domain for this device
2605 */
a4c63caf 2606 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2607 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2608 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2609 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2610
dfd0287b
LH
2611 if (WARN_ON(!hive)) {
2612 r = -ENOENT;
2613 goto init_failed;
2614 }
2615
46c67660 2616 if (!hive->reset_domain ||
2617 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2618 r = -ENOENT;
2619 amdgpu_put_xgmi_hive(hive);
2620 goto init_failed;
2621 }
2622
2623 /* Drop the early temporary reset domain we created for device */
2624 amdgpu_reset_put_reset_domain(adev->reset_domain);
2625 adev->reset_domain = hive->reset_domain;
9dfa4860 2626 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2627 }
a4c63caf
AG
2628 }
2629 }
2630
5fd8518d
AG
2631 r = amdgpu_device_init_schedulers(adev);
2632 if (r)
2633 goto init_failed;
e3c1b071 2634
2635 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2636 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2637 amdgpu_amdkfd_device_init(adev);
c6332b97 2638
bd607166
KR
2639 amdgpu_fru_get_product_info(adev);
2640
72d3f592 2641init_failed:
c6332b97 2642
72d3f592 2643 return r;
d38ceaf9
AD
2644}
2645
e3ecdffa
AD
2646/**
2647 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2648 *
2649 * @adev: amdgpu_device pointer
2650 *
2651 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2652 * this function before a GPU reset. If the value is retained after a
2653 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2654 */
06ec9070 2655static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2656{
2657 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2658}
2659
e3ecdffa
AD
2660/**
2661 * amdgpu_device_check_vram_lost - check if vram is valid
2662 *
2663 * @adev: amdgpu_device pointer
2664 *
2665 * Checks the reset magic value written to the gart pointer in VRAM.
2666 * The driver calls this after a GPU reset to see if the contents of
2667 * VRAM is lost or now.
2668 * returns true if vram is lost, false if not.
2669 */
06ec9070 2670static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2671{
dadce777
EQ
2672 if (memcmp(adev->gart.ptr, adev->reset_magic,
2673 AMDGPU_RESET_MAGIC_NUM))
2674 return true;
2675
53b3f8f4 2676 if (!amdgpu_in_reset(adev))
dadce777
EQ
2677 return false;
2678
2679 /*
2680 * For all ASICs with baco/mode1 reset, the VRAM is
2681 * always assumed to be lost.
2682 */
2683 switch (amdgpu_asic_reset_method(adev)) {
2684 case AMD_RESET_METHOD_BACO:
2685 case AMD_RESET_METHOD_MODE1:
2686 return true;
2687 default:
2688 return false;
2689 }
0c49e0b8
CZ
2690}
2691
e3ecdffa 2692/**
1112a46b 2693 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2694 *
2695 * @adev: amdgpu_device pointer
b8b72130 2696 * @state: clockgating state (gate or ungate)
e3ecdffa 2697 *
e3ecdffa 2698 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2699 * set_clockgating_state callbacks are run.
2700 * Late initialization pass enabling clockgating for hardware IPs.
2701 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2702 * Returns 0 on success, negative error code on failure.
2703 */
fdd34271 2704
5d89bb2d
LL
2705int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2706 enum amd_clockgating_state state)
d38ceaf9 2707{
1112a46b 2708 int i, j, r;
d38ceaf9 2709
4a2ba394
SL
2710 if (amdgpu_emu_mode == 1)
2711 return 0;
2712
1112a46b
RZ
2713 for (j = 0; j < adev->num_ip_blocks; j++) {
2714 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2715 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2716 continue;
47198eb7 2717 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2718 if (adev->in_s0ix &&
47198eb7
AD
2719 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2720 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2721 continue;
4a446d55 2722 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2723 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2724 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2725 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2726 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2727 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2728 /* enable clockgating to save power */
a1255107 2729 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2730 state);
4a446d55
AD
2731 if (r) {
2732 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2733 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2734 return r;
2735 }
b0b00ff1 2736 }
d38ceaf9 2737 }
06b18f61 2738
c9f96fd5
RZ
2739 return 0;
2740}
2741
5d89bb2d
LL
2742int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2743 enum amd_powergating_state state)
c9f96fd5 2744{
1112a46b 2745 int i, j, r;
06b18f61 2746
c9f96fd5
RZ
2747 if (amdgpu_emu_mode == 1)
2748 return 0;
2749
1112a46b
RZ
2750 for (j = 0; j < adev->num_ip_blocks; j++) {
2751 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2752 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2753 continue;
47198eb7 2754 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2755 if (adev->in_s0ix &&
47198eb7
AD
2756 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2757 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2758 continue;
c9f96fd5
RZ
2759 /* skip CG for VCE/UVD, it's handled specially */
2760 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2761 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2762 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2763 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2764 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2765 /* enable powergating to save power */
2766 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2767 state);
c9f96fd5
RZ
2768 if (r) {
2769 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2770 adev->ip_blocks[i].version->funcs->name, r);
2771 return r;
2772 }
2773 }
2774 }
2dc80b00
S
2775 return 0;
2776}
2777
beff74bc
AD
2778static int amdgpu_device_enable_mgpu_fan_boost(void)
2779{
2780 struct amdgpu_gpu_instance *gpu_ins;
2781 struct amdgpu_device *adev;
2782 int i, ret = 0;
2783
2784 mutex_lock(&mgpu_info.mutex);
2785
2786 /*
2787 * MGPU fan boost feature should be enabled
2788 * only when there are two or more dGPUs in
2789 * the system
2790 */
2791 if (mgpu_info.num_dgpu < 2)
2792 goto out;
2793
2794 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2795 gpu_ins = &(mgpu_info.gpu_ins[i]);
2796 adev = gpu_ins->adev;
2797 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2798 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2799 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2800 if (ret)
2801 break;
2802
2803 gpu_ins->mgpu_fan_enabled = 1;
2804 }
2805 }
2806
2807out:
2808 mutex_unlock(&mgpu_info.mutex);
2809
2810 return ret;
2811}
2812
e3ecdffa
AD
2813/**
2814 * amdgpu_device_ip_late_init - run late init for hardware IPs
2815 *
2816 * @adev: amdgpu_device pointer
2817 *
2818 * Late initialization pass for hardware IPs. The list of all the hardware
2819 * IPs that make up the asic is walked and the late_init callbacks are run.
2820 * late_init covers any special initialization that an IP requires
2821 * after all of the have been initialized or something that needs to happen
2822 * late in the init process.
2823 * Returns 0 on success, negative error code on failure.
2824 */
06ec9070 2825static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2826{
60599a03 2827 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2828 int i = 0, r;
2829
2830 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2831 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2832 continue;
2833 if (adev->ip_blocks[i].version->funcs->late_init) {
2834 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2835 if (r) {
2836 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2837 adev->ip_blocks[i].version->funcs->name, r);
2838 return r;
2839 }
2dc80b00 2840 }
73f847db 2841 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2842 }
2843
867e24ca 2844 r = amdgpu_ras_late_init(adev);
2845 if (r) {
2846 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2847 return r;
2848 }
2849
a891d239
DL
2850 amdgpu_ras_set_error_query_ready(adev, true);
2851
1112a46b
RZ
2852 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2853 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2854
06ec9070 2855 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2856
beff74bc
AD
2857 r = amdgpu_device_enable_mgpu_fan_boost();
2858 if (r)
2859 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2860
4da8b639 2861 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2862 if (amdgpu_passthrough(adev) &&
2863 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2864 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2865 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2866
2867 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2868 mutex_lock(&mgpu_info.mutex);
2869
2870 /*
2871 * Reset device p-state to low as this was booted with high.
2872 *
2873 * This should be performed only after all devices from the same
2874 * hive get initialized.
2875 *
2876 * However, it's unknown how many device in the hive in advance.
2877 * As this is counted one by one during devices initializations.
2878 *
2879 * So, we wait for all XGMI interlinked devices initialized.
2880 * This may bring some delays as those devices may come from
2881 * different hives. But that should be OK.
2882 */
2883 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2884 for (i = 0; i < mgpu_info.num_gpu; i++) {
2885 gpu_instance = &(mgpu_info.gpu_ins[i]);
2886 if (gpu_instance->adev->flags & AMD_IS_APU)
2887 continue;
2888
d84a430d
JK
2889 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2890 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2891 if (r) {
2892 DRM_ERROR("pstate setting failed (%d).\n", r);
2893 break;
2894 }
2895 }
2896 }
2897
2898 mutex_unlock(&mgpu_info.mutex);
2899 }
2900
d38ceaf9
AD
2901 return 0;
2902}
2903
613aa3ea
LY
2904/**
2905 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2906 *
2907 * @adev: amdgpu_device pointer
2908 *
2909 * For ASICs need to disable SMC first
2910 */
2911static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2912{
2913 int i, r;
2914
2915 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2916 return;
2917
2918 for (i = 0; i < adev->num_ip_blocks; i++) {
2919 if (!adev->ip_blocks[i].status.hw)
2920 continue;
2921 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2922 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2923 /* XXX handle errors */
2924 if (r) {
2925 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2926 adev->ip_blocks[i].version->funcs->name, r);
2927 }
2928 adev->ip_blocks[i].status.hw = false;
2929 break;
2930 }
2931 }
2932}
2933
e9669fb7 2934static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2935{
2936 int i, r;
2937
e9669fb7
AG
2938 for (i = 0; i < adev->num_ip_blocks; i++) {
2939 if (!adev->ip_blocks[i].version->funcs->early_fini)
2940 continue;
5278a159 2941
e9669fb7
AG
2942 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2943 if (r) {
2944 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2945 adev->ip_blocks[i].version->funcs->name, r);
2946 }
2947 }
c030f2e4 2948
05df1f01 2949 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2950 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2951
7270e895
TY
2952 amdgpu_amdkfd_suspend(adev, false);
2953
613aa3ea
LY
2954 /* Workaroud for ASICs need to disable SMC first */
2955 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2956
d38ceaf9 2957 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2958 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2959 continue;
8201a67a 2960
a1255107 2961 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2962 /* XXX handle errors */
2c1a2784 2963 if (r) {
a1255107
AD
2964 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2965 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2966 }
8201a67a 2967
a1255107 2968 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2969 }
2970
6effad8a
GC
2971 if (amdgpu_sriov_vf(adev)) {
2972 if (amdgpu_virt_release_full_gpu(adev, false))
2973 DRM_ERROR("failed to release exclusive mode on fini\n");
2974 }
2975
e9669fb7
AG
2976 return 0;
2977}
2978
2979/**
2980 * amdgpu_device_ip_fini - run fini for hardware IPs
2981 *
2982 * @adev: amdgpu_device pointer
2983 *
2984 * Main teardown pass for hardware IPs. The list of all the hardware
2985 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2986 * are run. hw_fini tears down the hardware associated with each IP
2987 * and sw_fini tears down any software state associated with each IP.
2988 * Returns 0 on success, negative error code on failure.
2989 */
2990static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2991{
2992 int i, r;
2993
2994 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2995 amdgpu_virt_release_ras_err_handler_data(adev);
2996
e9669fb7
AG
2997 if (adev->gmc.xgmi.num_physical_nodes > 1)
2998 amdgpu_xgmi_remove_device(adev);
2999
c004d44e 3000 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3001
d38ceaf9 3002 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3003 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3004 continue;
c12aba3a
ML
3005
3006 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3007 amdgpu_ucode_free_bo(adev);
1e256e27 3008 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3009 amdgpu_device_wb_fini(adev);
7ccfd79f 3010 amdgpu_device_mem_scratch_fini(adev);
533aed27 3011 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3012 }
3013
a1255107 3014 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3015 /* XXX handle errors */
2c1a2784 3016 if (r) {
a1255107
AD
3017 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3018 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3019 }
a1255107
AD
3020 adev->ip_blocks[i].status.sw = false;
3021 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3022 }
3023
a6dcfd9c 3024 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3025 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3026 continue;
a1255107
AD
3027 if (adev->ip_blocks[i].version->funcs->late_fini)
3028 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3029 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3030 }
3031
c030f2e4 3032 amdgpu_ras_fini(adev);
3033
d38ceaf9
AD
3034 return 0;
3035}
3036
e3ecdffa 3037/**
beff74bc 3038 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3039 *
1112a46b 3040 * @work: work_struct.
e3ecdffa 3041 */
beff74bc 3042static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3043{
3044 struct amdgpu_device *adev =
beff74bc 3045 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3046 int r;
3047
3048 r = amdgpu_ib_ring_tests(adev);
3049 if (r)
3050 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3051}
3052
1e317b99
RZ
3053static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3054{
3055 struct amdgpu_device *adev =
3056 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3057
90a92662
MD
3058 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3059 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3060
3061 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3062 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3063}
3064
e3ecdffa 3065/**
e7854a03 3066 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3067 *
3068 * @adev: amdgpu_device pointer
3069 *
3070 * Main suspend function for hardware IPs. The list of all the hardware
3071 * IPs that make up the asic is walked, clockgating is disabled and the
3072 * suspend callbacks are run. suspend puts the hardware and software state
3073 * in each IP into a state suitable for suspend.
3074 * Returns 0 on success, negative error code on failure.
3075 */
e7854a03
AD
3076static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3077{
3078 int i, r;
3079
50ec83f0
AD
3080 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3081 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3082
b31d6ada
EQ
3083 /*
3084 * Per PMFW team's suggestion, driver needs to handle gfxoff
3085 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3086 * scenario. Add the missing df cstate disablement here.
3087 */
3088 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3089 dev_warn(adev->dev, "Failed to disallow df cstate");
3090
e7854a03
AD
3091 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3092 if (!adev->ip_blocks[i].status.valid)
3093 continue;
2b9f7848 3094
e7854a03 3095 /* displays are handled separately */
2b9f7848
ND
3096 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3097 continue;
3098
3099 /* XXX handle errors */
3100 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3101 /* XXX handle errors */
3102 if (r) {
3103 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3104 adev->ip_blocks[i].version->funcs->name, r);
3105 return r;
e7854a03 3106 }
2b9f7848
ND
3107
3108 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3109 }
3110
e7854a03
AD
3111 return 0;
3112}
3113
3114/**
3115 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3116 *
3117 * @adev: amdgpu_device pointer
3118 *
3119 * Main suspend function for hardware IPs. The list of all the hardware
3120 * IPs that make up the asic is walked, clockgating is disabled and the
3121 * suspend callbacks are run. suspend puts the hardware and software state
3122 * in each IP into a state suitable for suspend.
3123 * Returns 0 on success, negative error code on failure.
3124 */
3125static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3126{
3127 int i, r;
3128
557f42a2 3129 if (adev->in_s0ix)
bc143d8b 3130 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3131
d38ceaf9 3132 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3133 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3134 continue;
e7854a03
AD
3135 /* displays are handled in phase1 */
3136 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3137 continue;
bff77e86
LM
3138 /* PSP lost connection when err_event_athub occurs */
3139 if (amdgpu_ras_intr_triggered() &&
3140 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3141 adev->ip_blocks[i].status.hw = false;
3142 continue;
3143 }
e3c1b071 3144
3145 /* skip unnecessary suspend if we do not initialize them yet */
3146 if (adev->gmc.xgmi.pending_reset &&
3147 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3148 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3149 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3150 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3151 adev->ip_blocks[i].status.hw = false;
3152 continue;
3153 }
557f42a2 3154
afa6646b 3155 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3156 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3157 * like at runtime. PSP is also part of the always on hardware
3158 * so no need to suspend it.
3159 */
557f42a2 3160 if (adev->in_s0ix &&
32ff160d 3161 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3162 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3163 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3164 continue;
3165
2a7798ea
AD
3166 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3167 if (adev->in_s0ix &&
3168 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3169 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3170 continue;
3171
e11c7750
TH
3172 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3173 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3174 * from this location and RLC Autoload automatically also gets loaded
3175 * from here based on PMFW -> PSP message during re-init sequence.
3176 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3177 * the TMR and reload FWs again for IMU enabled APU ASICs.
3178 */
3179 if (amdgpu_in_reset(adev) &&
3180 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3181 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3182 continue;
3183
d38ceaf9 3184 /* XXX handle errors */
a1255107 3185 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3186 /* XXX handle errors */
2c1a2784 3187 if (r) {
a1255107
AD
3188 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3189 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3190 }
876923fb 3191 adev->ip_blocks[i].status.hw = false;
a3a09142 3192 /* handle putting the SMC in the appropriate state */
47fc644f 3193 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3194 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3195 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3196 if (r) {
3197 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3198 adev->mp1_state, r);
3199 return r;
3200 }
a3a09142
AD
3201 }
3202 }
d38ceaf9
AD
3203 }
3204
3205 return 0;
3206}
3207
e7854a03
AD
3208/**
3209 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3210 *
3211 * @adev: amdgpu_device pointer
3212 *
3213 * Main suspend function for hardware IPs. The list of all the hardware
3214 * IPs that make up the asic is walked, clockgating is disabled and the
3215 * suspend callbacks are run. suspend puts the hardware and software state
3216 * in each IP into a state suitable for suspend.
3217 * Returns 0 on success, negative error code on failure.
3218 */
3219int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3220{
3221 int r;
3222
3c73683c
JC
3223 if (amdgpu_sriov_vf(adev)) {
3224 amdgpu_virt_fini_data_exchange(adev);
e7819644 3225 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3226 }
e7819644 3227
e7854a03
AD
3228 r = amdgpu_device_ip_suspend_phase1(adev);
3229 if (r)
3230 return r;
3231 r = amdgpu_device_ip_suspend_phase2(adev);
3232
e7819644
YT
3233 if (amdgpu_sriov_vf(adev))
3234 amdgpu_virt_release_full_gpu(adev, false);
3235
e7854a03
AD
3236 return r;
3237}
3238
06ec9070 3239static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3240{
3241 int i, r;
3242
2cb681b6 3243 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3244 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3245 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3246 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3247 AMD_IP_BLOCK_TYPE_IH,
3248 };
a90ad3c2 3249
95ea3dbc 3250 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3251 int j;
3252 struct amdgpu_ip_block *block;
a90ad3c2 3253
4cd2a96d
J
3254 block = &adev->ip_blocks[i];
3255 block->status.hw = false;
2cb681b6 3256
4cd2a96d 3257 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3258
4cd2a96d 3259 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3260 !block->status.valid)
3261 continue;
3262
3263 r = block->version->funcs->hw_init(adev);
0aaeefcc 3264 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3265 if (r)
3266 return r;
482f0e53 3267 block->status.hw = true;
a90ad3c2
ML
3268 }
3269 }
3270
3271 return 0;
3272}
3273
06ec9070 3274static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3275{
3276 int i, r;
3277
2cb681b6
ML
3278 static enum amd_ip_block_type ip_order[] = {
3279 AMD_IP_BLOCK_TYPE_SMC,
3280 AMD_IP_BLOCK_TYPE_DCE,
3281 AMD_IP_BLOCK_TYPE_GFX,
3282 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3283 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3284 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3285 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3286 AMD_IP_BLOCK_TYPE_VCN,
3287 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3288 };
a90ad3c2 3289
2cb681b6
ML
3290 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3291 int j;
3292 struct amdgpu_ip_block *block;
a90ad3c2 3293
2cb681b6
ML
3294 for (j = 0; j < adev->num_ip_blocks; j++) {
3295 block = &adev->ip_blocks[j];
3296
3297 if (block->version->type != ip_order[i] ||
482f0e53
ML
3298 !block->status.valid ||
3299 block->status.hw)
2cb681b6
ML
3300 continue;
3301
895bd048
JZ
3302 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3303 r = block->version->funcs->resume(adev);
3304 else
3305 r = block->version->funcs->hw_init(adev);
3306
0aaeefcc 3307 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3308 if (r)
3309 return r;
482f0e53 3310 block->status.hw = true;
a90ad3c2
ML
3311 }
3312 }
3313
3314 return 0;
3315}
3316
e3ecdffa
AD
3317/**
3318 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3319 *
3320 * @adev: amdgpu_device pointer
3321 *
3322 * First resume function for hardware IPs. The list of all the hardware
3323 * IPs that make up the asic is walked and the resume callbacks are run for
3324 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3325 * after a suspend and updates the software state as necessary. This
3326 * function is also used for restoring the GPU after a GPU reset.
3327 * Returns 0 on success, negative error code on failure.
3328 */
06ec9070 3329static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3330{
3331 int i, r;
3332
a90ad3c2 3333 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3334 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3335 continue;
a90ad3c2 3336 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3337 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3338 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3339 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3340
fcf0649f
CZ
3341 r = adev->ip_blocks[i].version->funcs->resume(adev);
3342 if (r) {
3343 DRM_ERROR("resume of IP block <%s> failed %d\n",
3344 adev->ip_blocks[i].version->funcs->name, r);
3345 return r;
3346 }
482f0e53 3347 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3348 }
3349 }
3350
3351 return 0;
3352}
3353
e3ecdffa
AD
3354/**
3355 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3356 *
3357 * @adev: amdgpu_device pointer
3358 *
3359 * First resume function for hardware IPs. The list of all the hardware
3360 * IPs that make up the asic is walked and the resume callbacks are run for
3361 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3362 * functional state after a suspend and updates the software state as
3363 * necessary. This function is also used for restoring the GPU after a GPU
3364 * reset.
3365 * Returns 0 on success, negative error code on failure.
3366 */
06ec9070 3367static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3368{
3369 int i, r;
3370
3371 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3372 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3373 continue;
fcf0649f 3374 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3375 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3376 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3377 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3378 continue;
a1255107 3379 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3380 if (r) {
a1255107
AD
3381 DRM_ERROR("resume of IP block <%s> failed %d\n",
3382 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3383 return r;
2c1a2784 3384 }
482f0e53 3385 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3386 }
3387
3388 return 0;
3389}
3390
e3ecdffa
AD
3391/**
3392 * amdgpu_device_ip_resume - run resume for hardware IPs
3393 *
3394 * @adev: amdgpu_device pointer
3395 *
3396 * Main resume function for hardware IPs. The hardware IPs
3397 * are split into two resume functions because they are
3398 * are also used in in recovering from a GPU reset and some additional
3399 * steps need to be take between them. In this case (S3/S4) they are
3400 * run sequentially.
3401 * Returns 0 on success, negative error code on failure.
3402 */
06ec9070 3403static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3404{
3405 int r;
3406
f2206741
AL
3407 if (!adev->in_s0ix) {
3408 r = amdgpu_amdkfd_resume_iommu(adev);
3409 if (r)
3410 return r;
3411 }
9cec53c1 3412
06ec9070 3413 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3414 if (r)
3415 return r;
7a3e0bb2
RZ
3416
3417 r = amdgpu_device_fw_loading(adev);
3418 if (r)
3419 return r;
3420
06ec9070 3421 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3422
3423 return r;
3424}
3425
e3ecdffa
AD
3426/**
3427 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3428 *
3429 * @adev: amdgpu_device pointer
3430 *
3431 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3432 */
4e99a44e 3433static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3434{
6867e1b5
ML
3435 if (amdgpu_sriov_vf(adev)) {
3436 if (adev->is_atom_fw) {
58ff791a 3437 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3438 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3439 } else {
3440 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3441 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3442 }
3443
3444 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3445 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3446 }
048765ad
AR
3447}
3448
e3ecdffa
AD
3449/**
3450 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3451 *
3452 * @asic_type: AMD asic type
3453 *
3454 * Check if there is DC (new modesetting infrastructre) support for an asic.
3455 * returns true if DC has support, false if not.
3456 */
4562236b
HW
3457bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3458{
3459 switch (asic_type) {
0637d417
AD
3460#ifdef CONFIG_DRM_AMDGPU_SI
3461 case CHIP_HAINAN:
3462#endif
3463 case CHIP_TOPAZ:
3464 /* chips with no display hardware */
3465 return false;
4562236b 3466#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3467 case CHIP_TAHITI:
3468 case CHIP_PITCAIRN:
3469 case CHIP_VERDE:
3470 case CHIP_OLAND:
2d32ffd6
AD
3471 /*
3472 * We have systems in the wild with these ASICs that require
3473 * LVDS and VGA support which is not supported with DC.
3474 *
3475 * Fallback to the non-DC driver here by default so as not to
3476 * cause regressions.
3477 */
3478#if defined(CONFIG_DRM_AMD_DC_SI)
3479 return amdgpu_dc > 0;
3480#else
3481 return false;
64200c46 3482#endif
4562236b 3483 case CHIP_BONAIRE:
0d6fbccb 3484 case CHIP_KAVERI:
367e6687
AD
3485 case CHIP_KABINI:
3486 case CHIP_MULLINS:
d9fda248
HW
3487 /*
3488 * We have systems in the wild with these ASICs that require
b5a0168e 3489 * VGA support which is not supported with DC.
d9fda248
HW
3490 *
3491 * Fallback to the non-DC driver here by default so as not to
3492 * cause regressions.
3493 */
3494 return amdgpu_dc > 0;
f7f12b25 3495 default:
fd187853 3496 return amdgpu_dc != 0;
f7f12b25 3497#else
4562236b 3498 default:
93b09a9a 3499 if (amdgpu_dc > 0)
044a48f4 3500 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3501 "but isn't supported by ASIC, ignoring\n");
4562236b 3502 return false;
f7f12b25 3503#endif
4562236b
HW
3504 }
3505}
3506
3507/**
3508 * amdgpu_device_has_dc_support - check if dc is supported
3509 *
982a820b 3510 * @adev: amdgpu_device pointer
4562236b
HW
3511 *
3512 * Returns true for supported, false for not supported
3513 */
3514bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3515{
25263da3 3516 if (adev->enable_virtual_display ||
abaf210c 3517 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3518 return false;
3519
4562236b
HW
3520 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3521}
3522
d4535e2c
AG
3523static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3524{
3525 struct amdgpu_device *adev =
3526 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3527 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3528
c6a6e2db
AG
3529 /* It's a bug to not have a hive within this function */
3530 if (WARN_ON(!hive))
3531 return;
3532
3533 /*
3534 * Use task barrier to synchronize all xgmi reset works across the
3535 * hive. task_barrier_enter and task_barrier_exit will block
3536 * until all the threads running the xgmi reset works reach
3537 * those points. task_barrier_full will do both blocks.
3538 */
3539 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3540
3541 task_barrier_enter(&hive->tb);
4a580877 3542 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3543
3544 if (adev->asic_reset_res)
3545 goto fail;
3546
3547 task_barrier_exit(&hive->tb);
4a580877 3548 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3549
3550 if (adev->asic_reset_res)
3551 goto fail;
43c4d576 3552
5e67bba3 3553 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3554 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3555 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3556 } else {
3557
3558 task_barrier_full(&hive->tb);
3559 adev->asic_reset_res = amdgpu_asic_reset(adev);
3560 }
ce316fa5 3561
c6a6e2db 3562fail:
d4535e2c 3563 if (adev->asic_reset_res)
fed184e9 3564 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3565 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3566 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3567}
3568
71f98027
AD
3569static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3570{
3571 char *input = amdgpu_lockup_timeout;
3572 char *timeout_setting = NULL;
3573 int index = 0;
3574 long timeout;
3575 int ret = 0;
3576
3577 /*
67387dfe
AD
3578 * By default timeout for non compute jobs is 10000
3579 * and 60000 for compute jobs.
71f98027 3580 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3581 * jobs are 60000 by default.
71f98027
AD
3582 */
3583 adev->gfx_timeout = msecs_to_jiffies(10000);
3584 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3585 if (amdgpu_sriov_vf(adev))
3586 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3587 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3588 else
67387dfe 3589 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3590
f440ff44 3591 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3592 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3593 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3594 ret = kstrtol(timeout_setting, 0, &timeout);
3595 if (ret)
3596 return ret;
3597
3598 if (timeout == 0) {
3599 index++;
3600 continue;
3601 } else if (timeout < 0) {
3602 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3603 dev_warn(adev->dev, "lockup timeout disabled");
3604 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3605 } else {
3606 timeout = msecs_to_jiffies(timeout);
3607 }
3608
3609 switch (index++) {
3610 case 0:
3611 adev->gfx_timeout = timeout;
3612 break;
3613 case 1:
3614 adev->compute_timeout = timeout;
3615 break;
3616 case 2:
3617 adev->sdma_timeout = timeout;
3618 break;
3619 case 3:
3620 adev->video_timeout = timeout;
3621 break;
3622 default:
3623 break;
3624 }
3625 }
3626 /*
3627 * There is only one value specified and
3628 * it should apply to all non-compute jobs.
3629 */
bcccee89 3630 if (index == 1) {
71f98027 3631 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3632 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3633 adev->compute_timeout = adev->gfx_timeout;
3634 }
71f98027
AD
3635 }
3636
3637 return ret;
3638}
d4535e2c 3639
4a74c38c
PY
3640/**
3641 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3642 *
3643 * @adev: amdgpu_device pointer
3644 *
3645 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3646 */
3647static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3648{
3649 struct iommu_domain *domain;
3650
3651 domain = iommu_get_domain_for_dev(adev->dev);
3652 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3653 adev->ram_is_direct_mapped = true;
3654}
3655
77f3a5cd
ND
3656static const struct attribute *amdgpu_dev_attributes[] = {
3657 &dev_attr_product_name.attr,
3658 &dev_attr_product_number.attr,
3659 &dev_attr_serial_number.attr,
3660 &dev_attr_pcie_replay_count.attr,
3661 NULL
3662};
3663
d38ceaf9
AD
3664/**
3665 * amdgpu_device_init - initialize the driver
3666 *
3667 * @adev: amdgpu_device pointer
d38ceaf9
AD
3668 * @flags: driver flags
3669 *
3670 * Initializes the driver info and hw (all asics).
3671 * Returns 0 for success or an error on failure.
3672 * Called at driver startup.
3673 */
3674int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3675 uint32_t flags)
3676{
8aba21b7
LT
3677 struct drm_device *ddev = adev_to_drm(adev);
3678 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3679 int r, i;
b98c6299 3680 bool px = false;
95844d20 3681 u32 max_MBps;
59e9fff1 3682 int tmp;
d38ceaf9
AD
3683
3684 adev->shutdown = false;
d38ceaf9 3685 adev->flags = flags;
4e66d7d2
YZ
3686
3687 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3688 adev->asic_type = amdgpu_force_asic_type;
3689 else
3690 adev->asic_type = flags & AMD_ASIC_MASK;
3691
d38ceaf9 3692 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3693 if (amdgpu_emu_mode == 1)
8bdab6bb 3694 adev->usec_timeout *= 10;
770d13b1 3695 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3696 adev->accel_working = false;
3697 adev->num_rings = 0;
68ce8b24 3698 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3699 adev->mman.buffer_funcs = NULL;
3700 adev->mman.buffer_funcs_ring = NULL;
3701 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3702 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3703 adev->gmc.gmc_funcs = NULL;
7bd939d0 3704 adev->harvest_ip_mask = 0x0;
f54d1867 3705 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3706 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3707
3708 adev->smc_rreg = &amdgpu_invalid_rreg;
3709 adev->smc_wreg = &amdgpu_invalid_wreg;
3710 adev->pcie_rreg = &amdgpu_invalid_rreg;
3711 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3712 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3713 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3714 adev->pciep_rreg = &amdgpu_invalid_rreg;
3715 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3716 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3717 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3718 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3719 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3720 adev->didt_rreg = &amdgpu_invalid_rreg;
3721 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3722 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3723 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3724 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3725 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3726
3e39ab90
AD
3727 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3728 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3729 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3730
3731 /* mutex initialization are all done here so we
3732 * can recall function without having locking issues */
0e5ca0d1 3733 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3734 mutex_init(&adev->pm.mutex);
3735 mutex_init(&adev->gfx.gpu_clock_mutex);
3736 mutex_init(&adev->srbm_mutex);
b8866c26 3737 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3738 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3739 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3740 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3741 mutex_init(&adev->mn_lock);
e23b74aa 3742 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3743 hash_init(adev->mn_hash);
32eaeae0 3744 mutex_init(&adev->psp.mutex);
bd052211 3745 mutex_init(&adev->notifier_lock);
8cda7a4f 3746 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3747 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3748
ab3b9de6 3749 amdgpu_device_init_apu_flags(adev);
9f6a7857 3750
912dfc84
EQ
3751 r = amdgpu_device_check_arguments(adev);
3752 if (r)
3753 return r;
d38ceaf9 3754
d38ceaf9
AD
3755 spin_lock_init(&adev->mmio_idx_lock);
3756 spin_lock_init(&adev->smc_idx_lock);
3757 spin_lock_init(&adev->pcie_idx_lock);
3758 spin_lock_init(&adev->uvd_ctx_idx_lock);
3759 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3760 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3761 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3762 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3763 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3764
0c4e7fa5
CZ
3765 INIT_LIST_HEAD(&adev->shadow_list);
3766 mutex_init(&adev->shadow_list_lock);
3767
655ce9cb 3768 INIT_LIST_HEAD(&adev->reset_list);
3769
6492e1b0 3770 INIT_LIST_HEAD(&adev->ras_list);
3771
beff74bc
AD
3772 INIT_DELAYED_WORK(&adev->delayed_init_work,
3773 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3774 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3775 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3776
d4535e2c
AG
3777 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3778
d23ee13f 3779 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3780 adev->gfx.gfx_off_residency = 0;
3781 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3782 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3783
b265bdbd
EQ
3784 atomic_set(&adev->throttling_logging_enabled, 1);
3785 /*
3786 * If throttling continues, logging will be performed every minute
3787 * to avoid log flooding. "-1" is subtracted since the thermal
3788 * throttling interrupt comes every second. Thus, the total logging
3789 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3790 * for throttling interrupt) = 60 seconds.
3791 */
3792 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3793 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3794
0fa49558
AX
3795 /* Registers mapping */
3796 /* TODO: block userspace mapping of io register */
da69c161
KW
3797 if (adev->asic_type >= CHIP_BONAIRE) {
3798 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3799 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3800 } else {
3801 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3802 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3803 }
d38ceaf9 3804
6c08e0ef
EQ
3805 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3806 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3807
d38ceaf9
AD
3808 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3809 if (adev->rmmio == NULL) {
3810 return -ENOMEM;
3811 }
3812 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3813 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3814
5494d864
AD
3815 amdgpu_device_get_pcie_info(adev);
3816
b239c017
JX
3817 if (amdgpu_mcbp)
3818 DRM_INFO("MCBP is enabled\n");
3819
436afdfa
PY
3820 /*
3821 * Reset domain needs to be present early, before XGMI hive discovered
3822 * (if any) and intitialized to use reset sem and in_gpu reset flag
3823 * early on during init and before calling to RREG32.
3824 */
3825 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3826 if (!adev->reset_domain)
3827 return -ENOMEM;
3828
3aa0115d
ML
3829 /* detect hw virtualization here */
3830 amdgpu_detect_virtualization(adev);
3831
dffa11b4
ML
3832 r = amdgpu_device_get_job_timeout_settings(adev);
3833 if (r) {
3834 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3835 return r;
a190d1c7
XY
3836 }
3837
d38ceaf9 3838 /* early init functions */
06ec9070 3839 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3840 if (r)
4ef87d8f 3841 return r;
d38ceaf9 3842
b7cdb41e
ML
3843 /* Get rid of things like offb */
3844 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3845 if (r)
3846 return r;
3847
4d33e704
SK
3848 /* Enable TMZ based on IP_VERSION */
3849 amdgpu_gmc_tmz_set(adev);
3850
957b0787 3851 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3852 /* Need to get xgmi info early to decide the reset behavior*/
3853 if (adev->gmc.xgmi.supported) {
3854 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3855 if (r)
3856 return r;
3857 }
3858
8e6d0b69 3859 /* enable PCIE atomic ops */
b4520bfd
GW
3860 if (amdgpu_sriov_vf(adev)) {
3861 if (adev->virt.fw_reserve.p_pf2vf)
3862 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3863 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3864 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3865 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3866 * internal path natively support atomics, set have_atomics_support to true.
3867 */
b4520bfd
GW
3868 } else if ((adev->flags & AMD_IS_APU) &&
3869 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
0e768043 3870 adev->have_atomics_support = true;
b4520bfd 3871 } else {
8e6d0b69 3872 adev->have_atomics_support =
3873 !pci_enable_atomic_ops_to_root(adev->pdev,
3874 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3875 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3876 }
3877
8e6d0b69 3878 if (!adev->have_atomics_support)
3879 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3880
6585661d
OZ
3881 /* doorbell bar mapping and doorbell index init*/
3882 amdgpu_device_doorbell_init(adev);
3883
9475a943
SL
3884 if (amdgpu_emu_mode == 1) {
3885 /* post the asic on emulation mode */
3886 emu_soc_asic_init(adev);
bfca0289 3887 goto fence_driver_init;
9475a943 3888 }
bfca0289 3889
04442bf7
LL
3890 amdgpu_reset_init(adev);
3891
4e99a44e 3892 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3893 if (adev->bios)
3894 amdgpu_device_detect_sriov_bios(adev);
048765ad 3895
95e8e59e
AD
3896 /* check if we need to reset the asic
3897 * E.g., driver was not cleanly unloaded previously, etc.
3898 */
f14899fd 3899 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3900 if (adev->gmc.xgmi.num_physical_nodes) {
3901 dev_info(adev->dev, "Pending hive reset.\n");
3902 adev->gmc.xgmi.pending_reset = true;
3903 /* Only need to init necessary block for SMU to handle the reset */
3904 for (i = 0; i < adev->num_ip_blocks; i++) {
3905 if (!adev->ip_blocks[i].status.valid)
3906 continue;
3907 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3908 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3909 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3910 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3911 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3912 adev->ip_blocks[i].version->funcs->name);
3913 adev->ip_blocks[i].status.hw = true;
3914 }
3915 }
3916 } else {
59e9fff1 3917 tmp = amdgpu_reset_method;
3918 /* It should do a default reset when loading or reloading the driver,
3919 * regardless of the module parameter reset_method.
3920 */
3921 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3922 r = amdgpu_asic_reset(adev);
59e9fff1 3923 amdgpu_reset_method = tmp;
e3c1b071 3924 if (r) {
3925 dev_err(adev->dev, "asic reset on init failed\n");
3926 goto failed;
3927 }
95e8e59e
AD
3928 }
3929 }
3930
d38ceaf9 3931 /* Post card if necessary */
39c640c0 3932 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3933 if (!adev->bios) {
bec86378 3934 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3935 r = -EINVAL;
3936 goto failed;
d38ceaf9 3937 }
bec86378 3938 DRM_INFO("GPU posting now...\n");
4d2997ab 3939 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3940 if (r) {
3941 dev_err(adev->dev, "gpu post error!\n");
3942 goto failed;
3943 }
d38ceaf9
AD
3944 }
3945
88b64e95
AD
3946 if (adev->is_atom_fw) {
3947 /* Initialize clocks */
3948 r = amdgpu_atomfirmware_get_clock_info(adev);
3949 if (r) {
3950 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3951 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3952 goto failed;
3953 }
3954 } else {
a5bde2f9
AD
3955 /* Initialize clocks */
3956 r = amdgpu_atombios_get_clock_info(adev);
3957 if (r) {
3958 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3959 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3960 goto failed;
a5bde2f9
AD
3961 }
3962 /* init i2c buses */
4562236b
HW
3963 if (!amdgpu_device_has_dc_support(adev))
3964 amdgpu_atombios_i2c_init(adev);
2c1a2784 3965 }
d38ceaf9 3966
bfca0289 3967fence_driver_init:
d38ceaf9 3968 /* Fence driver */
067f44c8 3969 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3970 if (r) {
067f44c8 3971 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3972 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3973 goto failed;
2c1a2784 3974 }
d38ceaf9
AD
3975
3976 /* init the mode config */
4a580877 3977 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3978
06ec9070 3979 r = amdgpu_device_ip_init(adev);
d38ceaf9 3980 if (r) {
06ec9070 3981 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3982 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3983 goto release_ras_con;
d38ceaf9
AD
3984 }
3985
8d35a259
LG
3986 amdgpu_fence_driver_hw_init(adev);
3987
d69b8971
YZ
3988 dev_info(adev->dev,
3989 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3990 adev->gfx.config.max_shader_engines,
3991 adev->gfx.config.max_sh_per_se,
3992 adev->gfx.config.max_cu_per_sh,
3993 adev->gfx.cu_info.number);
3994
d38ceaf9
AD
3995 adev->accel_working = true;
3996
e59c0205
AX
3997 amdgpu_vm_check_compute_bug(adev);
3998
95844d20
MO
3999 /* Initialize the buffer migration limit. */
4000 if (amdgpu_moverate >= 0)
4001 max_MBps = amdgpu_moverate;
4002 else
4003 max_MBps = 8; /* Allow 8 MB/s. */
4004 /* Get a log2 for easy divisions. */
4005 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4006
d2f52ac8 4007 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
4008 if (r)
4009 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 4010
5bb23532 4011 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
4012 if (r) {
4013 adev->ucode_sysfs_en = false;
5bb23532 4014 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
4015 } else
4016 adev->ucode_sysfs_en = true;
5bb23532 4017
8424f2cc
LG
4018 r = amdgpu_psp_sysfs_init(adev);
4019 if (r) {
4020 adev->psp_sysfs_en = false;
4021 if (!amdgpu_sriov_vf(adev))
4022 DRM_ERROR("Creating psp sysfs failed\n");
4023 } else
4024 adev->psp_sysfs_en = true;
4025
b0adca4d
EQ
4026 /*
4027 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4028 * Otherwise the mgpu fan boost feature will be skipped due to the
4029 * gpu instance is counted less.
4030 */
4031 amdgpu_register_gpu_instance(adev);
4032
d38ceaf9
AD
4033 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4034 * explicit gating rather than handling it automatically.
4035 */
e3c1b071 4036 if (!adev->gmc.xgmi.pending_reset) {
4037 r = amdgpu_device_ip_late_init(adev);
4038 if (r) {
4039 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4040 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4041 goto release_ras_con;
e3c1b071 4042 }
4043 /* must succeed. */
4044 amdgpu_ras_resume(adev);
4045 queue_delayed_work(system_wq, &adev->delayed_init_work,
4046 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4047 }
d38ceaf9 4048
38eecbe0
CL
4049 if (amdgpu_sriov_vf(adev)) {
4050 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4051 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4052 }
2c738637 4053
77f3a5cd 4054 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4055 if (r)
77f3a5cd 4056 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4057
d155bef0
AB
4058 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4059 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4060 if (r)
4061 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4062
c1dd4aa6
AG
4063 /* Have stored pci confspace at hand for restore in sudden PCI error */
4064 if (amdgpu_device_cache_pci_state(adev->pdev))
4065 pci_restore_state(pdev);
4066
8c3dd61c
KHF
4067 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4068 /* this will fail for cards that aren't VGA class devices, just
4069 * ignore it */
4070 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4071 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4072
d37a3929
OC
4073 px = amdgpu_device_supports_px(ddev);
4074
4075 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4076 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4077 vga_switcheroo_register_client(adev->pdev,
4078 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4079
4080 if (px)
8c3dd61c 4081 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4082
e3c1b071 4083 if (adev->gmc.xgmi.pending_reset)
4084 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4085 msecs_to_jiffies(AMDGPU_RESUME_MS));
4086
4a74c38c
PY
4087 amdgpu_device_check_iommu_direct_map(adev);
4088
d38ceaf9 4089 return 0;
83ba126a 4090
970fd197 4091release_ras_con:
38eecbe0
CL
4092 if (amdgpu_sriov_vf(adev))
4093 amdgpu_virt_release_full_gpu(adev, true);
4094
4095 /* failed in exclusive mode due to timeout */
4096 if (amdgpu_sriov_vf(adev) &&
4097 !amdgpu_sriov_runtime(adev) &&
4098 amdgpu_virt_mmio_blocked(adev) &&
4099 !amdgpu_virt_wait_reset(adev)) {
4100 dev_err(adev->dev, "VF exclusive mode timeout\n");
4101 /* Don't send request since VF is inactive. */
4102 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4103 adev->virt.ops = NULL;
4104 r = -EAGAIN;
4105 }
970fd197
SY
4106 amdgpu_release_ras_context(adev);
4107
83ba126a 4108failed:
89041940 4109 amdgpu_vf_error_trans_all(adev);
8840a387 4110
83ba126a 4111 return r;
d38ceaf9
AD
4112}
4113
07775fc1
AG
4114static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4115{
62d5f9f7 4116
07775fc1
AG
4117 /* Clear all CPU mappings pointing to this device */
4118 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4119
4120 /* Unmap all mapped bars - Doorbell, registers and VRAM */
4121 amdgpu_device_doorbell_fini(adev);
4122
4123 iounmap(adev->rmmio);
4124 adev->rmmio = NULL;
4125 if (adev->mman.aper_base_kaddr)
4126 iounmap(adev->mman.aper_base_kaddr);
4127 adev->mman.aper_base_kaddr = NULL;
4128
4129 /* Memory manager related */
a0ba1279 4130 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4131 arch_phys_wc_del(adev->gmc.vram_mtrr);
4132 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4133 }
4134}
4135
d38ceaf9 4136/**
bbe04dec 4137 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4138 *
4139 * @adev: amdgpu_device pointer
4140 *
4141 * Tear down the driver info (all asics).
4142 * Called at driver shutdown.
4143 */
72c8c97b 4144void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4145{
aac89168 4146 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4147 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4148 adev->shutdown = true;
9f875167 4149
752c683d
ML
4150 /* make sure IB test finished before entering exclusive mode
4151 * to avoid preemption on IB test
4152 * */
519b8b76 4153 if (amdgpu_sriov_vf(adev)) {
752c683d 4154 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4155 amdgpu_virt_fini_data_exchange(adev);
4156 }
752c683d 4157
e5b03032
ML
4158 /* disable all interrupts */
4159 amdgpu_irq_disable_all(adev);
47fc644f 4160 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4161 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4162 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4163 else
4a580877 4164 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4165 }
8d35a259 4166 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4167
cd3a8a59 4168 if (adev->mman.initialized)
9bff18d1 4169 drain_workqueue(adev->mman.bdev.wq);
98f56188 4170
53e9d836 4171 if (adev->pm.sysfs_initialized)
7c868b59 4172 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4173 if (adev->ucode_sysfs_en)
4174 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
4175 if (adev->psp_sysfs_en)
4176 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
4177 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4178
232d1d43
SY
4179 /* disable ras feature must before hw fini */
4180 amdgpu_ras_pre_fini(adev);
4181
e9669fb7 4182 amdgpu_device_ip_fini_early(adev);
d10d0daa 4183
a3848df6
YW
4184 amdgpu_irq_fini_hw(adev);
4185
b6fd6e0f
SK
4186 if (adev->mman.initialized)
4187 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4188
d10d0daa 4189 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4190
39934d3e
VP
4191 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4192 amdgpu_device_unmap_mmio(adev);
87172e89 4193
72c8c97b
AG
4194}
4195
4196void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4197{
62d5f9f7 4198 int idx;
d37a3929 4199 bool px;
62d5f9f7 4200
8d35a259 4201 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4202 amdgpu_device_ip_fini(adev);
b31d3063 4203 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4204 adev->accel_working = false;
68ce8b24 4205 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4206
4207 amdgpu_reset_fini(adev);
4208
d38ceaf9 4209 /* free i2c buses */
4562236b
HW
4210 if (!amdgpu_device_has_dc_support(adev))
4211 amdgpu_i2c_fini(adev);
bfca0289
SL
4212
4213 if (amdgpu_emu_mode != 1)
4214 amdgpu_atombios_fini(adev);
4215
d38ceaf9
AD
4216 kfree(adev->bios);
4217 adev->bios = NULL;
d37a3929
OC
4218
4219 px = amdgpu_device_supports_px(adev_to_drm(adev));
4220
4221 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4222 apple_gmux_detect(NULL, NULL)))
84c8b22e 4223 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4224
4225 if (px)
83ba126a 4226 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4227
38d6be81 4228 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4229 vga_client_unregister(adev->pdev);
e9bc1bf7 4230
62d5f9f7
LS
4231 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4232
4233 iounmap(adev->rmmio);
4234 adev->rmmio = NULL;
4235 amdgpu_device_doorbell_fini(adev);
4236 drm_dev_exit(idx);
4237 }
4238
d155bef0
AB
4239 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4240 amdgpu_pmu_fini(adev);
72de33f8 4241 if (adev->mman.discovery_bin)
a190d1c7 4242 amdgpu_discovery_fini(adev);
72c8c97b 4243
cfbb6b00
AG
4244 amdgpu_reset_put_reset_domain(adev->reset_domain);
4245 adev->reset_domain = NULL;
4246
72c8c97b
AG
4247 kfree(adev->pci_state);
4248
d38ceaf9
AD
4249}
4250
58144d28
ND
4251/**
4252 * amdgpu_device_evict_resources - evict device resources
4253 * @adev: amdgpu device object
4254 *
4255 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4256 * of the vram memory type. Mainly used for evicting device resources
4257 * at suspend time.
4258 *
4259 */
7863c155 4260static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4261{
7863c155
ML
4262 int ret;
4263
e53d9665
ML
4264 /* No need to evict vram on APUs for suspend to ram or s2idle */
4265 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4266 return 0;
58144d28 4267
7863c155
ML
4268 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4269 if (ret)
58144d28 4270 DRM_WARN("evicting device resources failed\n");
7863c155 4271 return ret;
58144d28 4272}
d38ceaf9
AD
4273
4274/*
4275 * Suspend & resume.
4276 */
4277/**
810ddc3a 4278 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4279 *
87e3f136 4280 * @dev: drm dev pointer
87e3f136 4281 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4282 *
4283 * Puts the hw in the suspend state (all asics).
4284 * Returns 0 for success or an error on failure.
4285 * Called at driver suspend.
4286 */
de185019 4287int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4288{
a2e15b0e 4289 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4290 int r = 0;
d38ceaf9 4291
d38ceaf9
AD
4292 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4293 return 0;
4294
44779b43 4295 adev->in_suspend = true;
3fa8f89d 4296
47ea2076
SF
4297 /* Evict the majority of BOs before grabbing the full access */
4298 r = amdgpu_device_evict_resources(adev);
4299 if (r)
4300 return r;
4301
d7274ec7
BZ
4302 if (amdgpu_sriov_vf(adev)) {
4303 amdgpu_virt_fini_data_exchange(adev);
4304 r = amdgpu_virt_request_full_gpu(adev, false);
4305 if (r)
4306 return r;
4307 }
4308
3fa8f89d
S
4309 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4310 DRM_WARN("smart shift update failed\n");
4311
5f818173 4312 if (fbcon)
087451f3 4313 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4314
beff74bc 4315 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4316
5e6932fe 4317 amdgpu_ras_suspend(adev);
4318
2196927b 4319 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4320
c004d44e 4321 if (!adev->in_s0ix)
5d3a2d95 4322 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4323
7863c155
ML
4324 r = amdgpu_device_evict_resources(adev);
4325 if (r)
4326 return r;
d38ceaf9 4327
8d35a259 4328 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4329
2196927b 4330 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4331
d7274ec7
BZ
4332 if (amdgpu_sriov_vf(adev))
4333 amdgpu_virt_release_full_gpu(adev, false);
4334
d38ceaf9
AD
4335 return 0;
4336}
4337
4338/**
810ddc3a 4339 * amdgpu_device_resume - initiate device resume
d38ceaf9 4340 *
87e3f136 4341 * @dev: drm dev pointer
87e3f136 4342 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4343 *
4344 * Bring the hw back to operating state (all asics).
4345 * Returns 0 for success or an error on failure.
4346 * Called at driver resume.
4347 */
de185019 4348int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4349{
1348969a 4350 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4351 int r = 0;
d38ceaf9 4352
d7274ec7
BZ
4353 if (amdgpu_sriov_vf(adev)) {
4354 r = amdgpu_virt_request_full_gpu(adev, true);
4355 if (r)
4356 return r;
4357 }
4358
d38ceaf9
AD
4359 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4360 return 0;
4361
62498733 4362 if (adev->in_s0ix)
bc143d8b 4363 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4364
d38ceaf9 4365 /* post card */
39c640c0 4366 if (amdgpu_device_need_post(adev)) {
4d2997ab 4367 r = amdgpu_device_asic_init(adev);
74b0b157 4368 if (r)
aac89168 4369 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4370 }
d38ceaf9 4371
06ec9070 4372 r = amdgpu_device_ip_resume(adev);
d7274ec7 4373
e6707218 4374 if (r) {
aac89168 4375 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4376 goto exit;
e6707218 4377 }
8d35a259 4378 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4379
06ec9070 4380 r = amdgpu_device_ip_late_init(adev);
03161a6e 4381 if (r)
3c22c1ea 4382 goto exit;
d38ceaf9 4383
beff74bc
AD
4384 queue_delayed_work(system_wq, &adev->delayed_init_work,
4385 msecs_to_jiffies(AMDGPU_RESUME_MS));
4386
c004d44e 4387 if (!adev->in_s0ix) {
5d3a2d95
AD
4388 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4389 if (r)
3c22c1ea 4390 goto exit;
5d3a2d95 4391 }
756e6880 4392
3c22c1ea
SF
4393exit:
4394 if (amdgpu_sriov_vf(adev)) {
4395 amdgpu_virt_init_data_exchange(adev);
4396 amdgpu_virt_release_full_gpu(adev, true);
4397 }
4398
4399 if (r)
4400 return r;
4401
96a5d8d4 4402 /* Make sure IB tests flushed */
beff74bc 4403 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4404
a2e15b0e 4405 if (fbcon)
087451f3 4406 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4407
5e6932fe 4408 amdgpu_ras_resume(adev);
4409
d09ef243
AD
4410 if (adev->mode_info.num_crtc) {
4411 /*
4412 * Most of the connector probing functions try to acquire runtime pm
4413 * refs to ensure that the GPU is powered on when connector polling is
4414 * performed. Since we're calling this from a runtime PM callback,
4415 * trying to acquire rpm refs will cause us to deadlock.
4416 *
4417 * Since we're guaranteed to be holding the rpm lock, it's safe to
4418 * temporarily disable the rpm helpers so this doesn't deadlock us.
4419 */
23a1a9e5 4420#ifdef CONFIG_PM
d09ef243 4421 dev->dev->power.disable_depth++;
23a1a9e5 4422#endif
d09ef243
AD
4423 if (!adev->dc_enabled)
4424 drm_helper_hpd_irq_event(dev);
4425 else
4426 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4427#ifdef CONFIG_PM
d09ef243 4428 dev->dev->power.disable_depth--;
23a1a9e5 4429#endif
d09ef243 4430 }
44779b43
RZ
4431 adev->in_suspend = false;
4432
dc907c9d
JX
4433 if (adev->enable_mes)
4434 amdgpu_mes_self_test(adev);
4435
3fa8f89d
S
4436 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4437 DRM_WARN("smart shift update failed\n");
4438
4d3b9ae5 4439 return 0;
d38ceaf9
AD
4440}
4441
e3ecdffa
AD
4442/**
4443 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4444 *
4445 * @adev: amdgpu_device pointer
4446 *
4447 * The list of all the hardware IPs that make up the asic is walked and
4448 * the check_soft_reset callbacks are run. check_soft_reset determines
4449 * if the asic is still hung or not.
4450 * Returns true if any of the IPs are still in a hung state, false if not.
4451 */
06ec9070 4452static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4453{
4454 int i;
4455 bool asic_hang = false;
4456
f993d628
ML
4457 if (amdgpu_sriov_vf(adev))
4458 return true;
4459
8bc04c29
AD
4460 if (amdgpu_asic_need_full_reset(adev))
4461 return true;
4462
63fbf42f 4463 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4464 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4465 continue;
a1255107
AD
4466 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4467 adev->ip_blocks[i].status.hang =
4468 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4469 if (adev->ip_blocks[i].status.hang) {
aac89168 4470 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4471 asic_hang = true;
4472 }
4473 }
4474 return asic_hang;
4475}
4476
e3ecdffa
AD
4477/**
4478 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4479 *
4480 * @adev: amdgpu_device pointer
4481 *
4482 * The list of all the hardware IPs that make up the asic is walked and the
4483 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4484 * handles any IP specific hardware or software state changes that are
4485 * necessary for a soft reset to succeed.
4486 * Returns 0 on success, negative error code on failure.
4487 */
06ec9070 4488static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4489{
4490 int i, r = 0;
4491
4492 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4493 if (!adev->ip_blocks[i].status.valid)
d31a501e 4494 continue;
a1255107
AD
4495 if (adev->ip_blocks[i].status.hang &&
4496 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4497 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4498 if (r)
4499 return r;
4500 }
4501 }
4502
4503 return 0;
4504}
4505
e3ecdffa
AD
4506/**
4507 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4508 *
4509 * @adev: amdgpu_device pointer
4510 *
4511 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4512 * reset is necessary to recover.
4513 * Returns true if a full asic reset is required, false if not.
4514 */
06ec9070 4515static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4516{
da146d3b
AD
4517 int i;
4518
8bc04c29
AD
4519 if (amdgpu_asic_need_full_reset(adev))
4520 return true;
4521
da146d3b 4522 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4523 if (!adev->ip_blocks[i].status.valid)
da146d3b 4524 continue;
a1255107
AD
4525 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4526 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4527 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4528 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4529 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4530 if (adev->ip_blocks[i].status.hang) {
aac89168 4531 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4532 return true;
4533 }
4534 }
35d782fe
CZ
4535 }
4536 return false;
4537}
4538
e3ecdffa
AD
4539/**
4540 * amdgpu_device_ip_soft_reset - do a soft reset
4541 *
4542 * @adev: amdgpu_device pointer
4543 *
4544 * The list of all the hardware IPs that make up the asic is walked and the
4545 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4546 * IP specific hardware or software state changes that are necessary to soft
4547 * reset the IP.
4548 * Returns 0 on success, negative error code on failure.
4549 */
06ec9070 4550static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4551{
4552 int i, r = 0;
4553
4554 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4555 if (!adev->ip_blocks[i].status.valid)
35d782fe 4556 continue;
a1255107
AD
4557 if (adev->ip_blocks[i].status.hang &&
4558 adev->ip_blocks[i].version->funcs->soft_reset) {
4559 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4560 if (r)
4561 return r;
4562 }
4563 }
4564
4565 return 0;
4566}
4567
e3ecdffa
AD
4568/**
4569 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4570 *
4571 * @adev: amdgpu_device pointer
4572 *
4573 * The list of all the hardware IPs that make up the asic is walked and the
4574 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4575 * handles any IP specific hardware or software state changes that are
4576 * necessary after the IP has been soft reset.
4577 * Returns 0 on success, negative error code on failure.
4578 */
06ec9070 4579static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4580{
4581 int i, r = 0;
4582
4583 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4584 if (!adev->ip_blocks[i].status.valid)
35d782fe 4585 continue;
a1255107
AD
4586 if (adev->ip_blocks[i].status.hang &&
4587 adev->ip_blocks[i].version->funcs->post_soft_reset)
4588 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4589 if (r)
4590 return r;
4591 }
4592
4593 return 0;
4594}
4595
e3ecdffa 4596/**
c33adbc7 4597 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4598 *
4599 * @adev: amdgpu_device pointer
4600 *
4601 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4602 * restore things like GPUVM page tables after a GPU reset where
4603 * the contents of VRAM might be lost.
403009bf
CK
4604 *
4605 * Returns:
4606 * 0 on success, negative error code on failure.
e3ecdffa 4607 */
c33adbc7 4608static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4609{
c41d1cf6 4610 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4611 struct amdgpu_bo *shadow;
e18aaea7 4612 struct amdgpu_bo_vm *vmbo;
403009bf 4613 long r = 1, tmo;
c41d1cf6
ML
4614
4615 if (amdgpu_sriov_runtime(adev))
b045d3af 4616 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4617 else
4618 tmo = msecs_to_jiffies(100);
4619
aac89168 4620 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4621 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4622 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4623 /* If vm is compute context or adev is APU, shadow will be NULL */
4624 if (!vmbo->shadow)
4625 continue;
4626 shadow = vmbo->shadow;
4627
403009bf 4628 /* No need to recover an evicted BO */
d3116756
CK
4629 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4630 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4631 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4632 continue;
4633
4634 r = amdgpu_bo_restore_shadow(shadow, &next);
4635 if (r)
4636 break;
4637
c41d1cf6 4638 if (fence) {
1712fb1a 4639 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4640 dma_fence_put(fence);
4641 fence = next;
1712fb1a 4642 if (tmo == 0) {
4643 r = -ETIMEDOUT;
c41d1cf6 4644 break;
1712fb1a 4645 } else if (tmo < 0) {
4646 r = tmo;
4647 break;
4648 }
403009bf
CK
4649 } else {
4650 fence = next;
c41d1cf6 4651 }
c41d1cf6
ML
4652 }
4653 mutex_unlock(&adev->shadow_list_lock);
4654
403009bf
CK
4655 if (fence)
4656 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4657 dma_fence_put(fence);
4658
1712fb1a 4659 if (r < 0 || tmo <= 0) {
aac89168 4660 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4661 return -EIO;
4662 }
c41d1cf6 4663
aac89168 4664 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4665 return 0;
c41d1cf6
ML
4666}
4667
a90ad3c2 4668
e3ecdffa 4669/**
06ec9070 4670 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4671 *
982a820b 4672 * @adev: amdgpu_device pointer
87e3f136 4673 * @from_hypervisor: request from hypervisor
5740682e
ML
4674 *
4675 * do VF FLR and reinitialize Asic
3f48c681 4676 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4677 */
4678static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4679 bool from_hypervisor)
5740682e
ML
4680{
4681 int r;
a5f67c93 4682 struct amdgpu_hive_info *hive = NULL;
7258fa31 4683 int retry_limit = 0;
5740682e 4684
7258fa31 4685retry:
c004d44e 4686 amdgpu_amdkfd_pre_reset(adev);
428890a3 4687
5740682e
ML
4688 if (from_hypervisor)
4689 r = amdgpu_virt_request_full_gpu(adev, true);
4690 else
4691 r = amdgpu_virt_reset_gpu(adev);
4692 if (r)
4693 return r;
a90ad3c2
ML
4694
4695 /* Resume IP prior to SMC */
06ec9070 4696 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4697 if (r)
4698 goto error;
a90ad3c2 4699
c9ffa427 4700 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4701
7a3e0bb2
RZ
4702 r = amdgpu_device_fw_loading(adev);
4703 if (r)
4704 return r;
4705
a90ad3c2 4706 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4707 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4708 if (r)
4709 goto error;
a90ad3c2 4710
a5f67c93
ZL
4711 hive = amdgpu_get_xgmi_hive(adev);
4712 /* Update PSP FW topology after reset */
4713 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4714 r = amdgpu_xgmi_update_topology(hive, adev);
4715
4716 if (hive)
4717 amdgpu_put_xgmi_hive(hive);
4718
4719 if (!r) {
4720 amdgpu_irq_gpu_reset_resume_helper(adev);
4721 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4722
c004d44e 4723 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4724 }
a90ad3c2 4725
abc34253 4726error:
c41d1cf6 4727 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4728 amdgpu_inc_vram_lost(adev);
c33adbc7 4729 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4730 }
437f3e0b 4731 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4732
7258fa31
SK
4733 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4734 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4735 retry_limit++;
4736 goto retry;
4737 } else
4738 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4739 }
4740
a90ad3c2
ML
4741 return r;
4742}
4743
9a1cddd6 4744/**
4745 * amdgpu_device_has_job_running - check if there is any job in mirror list
4746 *
982a820b 4747 * @adev: amdgpu_device pointer
9a1cddd6 4748 *
4749 * check if there is any job in mirror list
4750 */
4751bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4752{
4753 int i;
4754 struct drm_sched_job *job;
4755
4756 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4757 struct amdgpu_ring *ring = adev->rings[i];
4758
4759 if (!ring || !ring->sched.thread)
4760 continue;
4761
4762 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4763 job = list_first_entry_or_null(&ring->sched.pending_list,
4764 struct drm_sched_job, list);
9a1cddd6 4765 spin_unlock(&ring->sched.job_list_lock);
4766 if (job)
4767 return true;
4768 }
4769 return false;
4770}
4771
12938fad
CK
4772/**
4773 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4774 *
982a820b 4775 * @adev: amdgpu_device pointer
12938fad
CK
4776 *
4777 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4778 * a hung GPU.
4779 */
4780bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4781{
12938fad 4782
3ba7b418
AG
4783 if (amdgpu_gpu_recovery == 0)
4784 goto disabled;
4785
1a11a65d
YC
4786 /* Skip soft reset check in fatal error mode */
4787 if (!amdgpu_ras_is_poison_mode_supported(adev))
4788 return true;
4789
3ba7b418
AG
4790 if (amdgpu_sriov_vf(adev))
4791 return true;
4792
4793 if (amdgpu_gpu_recovery == -1) {
4794 switch (adev->asic_type) {
b3523c45
AD
4795#ifdef CONFIG_DRM_AMDGPU_SI
4796 case CHIP_VERDE:
4797 case CHIP_TAHITI:
4798 case CHIP_PITCAIRN:
4799 case CHIP_OLAND:
4800 case CHIP_HAINAN:
4801#endif
4802#ifdef CONFIG_DRM_AMDGPU_CIK
4803 case CHIP_KAVERI:
4804 case CHIP_KABINI:
4805 case CHIP_MULLINS:
4806#endif
4807 case CHIP_CARRIZO:
4808 case CHIP_STONEY:
4809 case CHIP_CYAN_SKILLFISH:
3ba7b418 4810 goto disabled;
b3523c45
AD
4811 default:
4812 break;
3ba7b418 4813 }
12938fad
CK
4814 }
4815
4816 return true;
3ba7b418
AG
4817
4818disabled:
aac89168 4819 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4820 return false;
12938fad
CK
4821}
4822
5c03e584
FX
4823int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4824{
47fc644f
SS
4825 u32 i;
4826 int ret = 0;
5c03e584 4827
47fc644f 4828 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4829
47fc644f 4830 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4831
47fc644f
SS
4832 /* disable BM */
4833 pci_clear_master(adev->pdev);
5c03e584 4834
47fc644f 4835 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4836
47fc644f
SS
4837 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4838 dev_info(adev->dev, "GPU smu mode1 reset\n");
4839 ret = amdgpu_dpm_mode1_reset(adev);
4840 } else {
4841 dev_info(adev->dev, "GPU psp mode1 reset\n");
4842 ret = psp_gpu_reset(adev);
4843 }
5c03e584 4844
47fc644f
SS
4845 if (ret)
4846 dev_err(adev->dev, "GPU mode1 reset failed\n");
5c03e584 4847
47fc644f 4848 amdgpu_device_load_pci_state(adev->pdev);
5c03e584 4849
47fc644f
SS
4850 /* wait for asic to come out of reset */
4851 for (i = 0; i < adev->usec_timeout; i++) {
4852 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4853
47fc644f
SS
4854 if (memsize != 0xffffffff)
4855 break;
4856 udelay(1);
4857 }
5c03e584 4858
47fc644f
SS
4859 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4860 return ret;
5c03e584 4861}
5c6dd71e 4862
e3c1b071 4863int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4864 struct amdgpu_reset_context *reset_context)
26bc5340 4865{
5c1e6fa4 4866 int i, r = 0;
04442bf7
LL
4867 struct amdgpu_job *job = NULL;
4868 bool need_full_reset =
4869 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4870
4871 if (reset_context->reset_req_dev == adev)
4872 job = reset_context->job;
71182665 4873
b602ca5f
TZ
4874 if (amdgpu_sriov_vf(adev)) {
4875 /* stop the data exchange thread */
4876 amdgpu_virt_fini_data_exchange(adev);
4877 }
4878
9e225fb9
AG
4879 amdgpu_fence_driver_isr_toggle(adev, true);
4880
71182665 4881 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4882 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4883 struct amdgpu_ring *ring = adev->rings[i];
4884
51687759 4885 if (!ring || !ring->sched.thread)
0875dc9e 4886 continue;
5740682e 4887
c530b02f
JZ
4888 /*clear job fence from fence drv to avoid force_completion
4889 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4890 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4891
2f9d4084
ML
4892 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4893 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4894 }
d38ceaf9 4895
9e225fb9
AG
4896 amdgpu_fence_driver_isr_toggle(adev, false);
4897
ff99849b 4898 if (job && job->vm)
222b5f04
AG
4899 drm_sched_increase_karma(&job->base);
4900
04442bf7 4901 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4902 /* If reset handler not implemented, continue; otherwise return */
4903 if (r == -ENOSYS)
4904 r = 0;
4905 else
04442bf7
LL
4906 return r;
4907
1d721ed6 4908 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4909 if (!amdgpu_sriov_vf(adev)) {
4910
4911 if (!need_full_reset)
4912 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4913
360cd081
LG
4914 if (!need_full_reset && amdgpu_gpu_recovery &&
4915 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4916 amdgpu_device_ip_pre_soft_reset(adev);
4917 r = amdgpu_device_ip_soft_reset(adev);
4918 amdgpu_device_ip_post_soft_reset(adev);
4919 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4920 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4921 need_full_reset = true;
4922 }
4923 }
4924
4925 if (need_full_reset)
4926 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4927 if (need_full_reset)
4928 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4929 else
4930 clear_bit(AMDGPU_NEED_FULL_RESET,
4931 &reset_context->flags);
26bc5340
AG
4932 }
4933
4934 return r;
4935}
4936
15fd09a0
SA
4937static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4938{
15fd09a0
SA
4939 int i;
4940
38a15ad9 4941 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4942
4943 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4944 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4945 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4946 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4947 }
4948
4949 return 0;
4950}
4951
3d8785f6
SA
4952#ifdef CONFIG_DEV_COREDUMP
4953static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4954 size_t count, void *data, size_t datalen)
4955{
4956 struct drm_printer p;
4957 struct amdgpu_device *adev = data;
4958 struct drm_print_iterator iter;
4959 int i;
4960
4961 iter.data = buffer;
4962 iter.offset = 0;
4963 iter.start = offset;
4964 iter.remain = count;
4965
4966 p = drm_coredump_printer(&iter);
4967
4968 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4969 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4970 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4971 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4972 if (adev->reset_task_info.pid)
4973 drm_printf(&p, "process_name: %s PID: %d\n",
4974 adev->reset_task_info.process_name,
4975 adev->reset_task_info.pid);
4976
4977 if (adev->reset_vram_lost)
4978 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4979 if (adev->num_regs) {
4980 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4981
4982 for (i = 0; i < adev->num_regs; i++)
4983 drm_printf(&p, "0x%08x: 0x%08x\n",
4984 adev->reset_dump_reg_list[i],
4985 adev->reset_dump_reg_value[i]);
4986 }
4987
4988 return count - iter.remain;
4989}
4990
4991static void amdgpu_devcoredump_free(void *data)
4992{
4993}
4994
4995static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4996{
4997 struct drm_device *dev = adev_to_drm(adev);
4998
4999 ktime_get_ts64(&adev->reset_time);
5000 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
5001 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
5002}
5003#endif
5004
04442bf7
LL
5005int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5006 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5007{
5008 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5009 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5010 int r = 0;
f5c7e779 5011 bool gpu_reset_for_dev_remove = 0;
26bc5340 5012
04442bf7
LL
5013 /* Try reset handler method first */
5014 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5015 reset_list);
15fd09a0 5016 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5017
5018 reset_context->reset_device_list = device_list_handle;
04442bf7 5019 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
5020 /* If reset handler not implemented, continue; otherwise return */
5021 if (r == -ENOSYS)
5022 r = 0;
5023 else
04442bf7
LL
5024 return r;
5025
5026 /* Reset handler not implemented, use the default method */
5027 need_full_reset =
5028 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5029 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5030
f5c7e779
YC
5031 gpu_reset_for_dev_remove =
5032 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5033 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5034
26bc5340 5035 /*
655ce9cb 5036 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5037 * to allow proper links negotiation in FW (within 1 sec)
5038 */
7ac71382 5039 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5040 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5041 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5042 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5043 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5044 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5045 r = -EALREADY;
5046 } else
5047 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5048
041a62bc 5049 if (r) {
aac89168 5050 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5051 r, adev_to_drm(tmp_adev)->unique);
041a62bc 5052 break;
ce316fa5
LM
5053 }
5054 }
5055
041a62bc
AG
5056 /* For XGMI wait for all resets to complete before proceed */
5057 if (!r) {
655ce9cb 5058 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5059 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5060 flush_work(&tmp_adev->xgmi_reset_work);
5061 r = tmp_adev->asic_reset_res;
5062 if (r)
5063 break;
ce316fa5
LM
5064 }
5065 }
5066 }
ce316fa5 5067 }
26bc5340 5068
43c4d576 5069 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5070 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 5071 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5072 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5073 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
5074 }
5075
00eaa571 5076 amdgpu_ras_intr_cleared();
43c4d576 5077 }
00eaa571 5078
f5c7e779
YC
5079 /* Since the mode1 reset affects base ip blocks, the
5080 * phase1 ip blocks need to be resumed. Otherwise there
5081 * will be a BIOS signature error and the psp bootloader
5082 * can't load kdb on the next amdgpu install.
5083 */
5084 if (gpu_reset_for_dev_remove) {
5085 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5086 amdgpu_device_ip_resume_phase1(tmp_adev);
5087
5088 goto end;
5089 }
5090
655ce9cb 5091 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5092 if (need_full_reset) {
5093 /* post card */
e3c1b071 5094 r = amdgpu_device_asic_init(tmp_adev);
5095 if (r) {
aac89168 5096 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5097 } else {
26bc5340 5098 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
5099 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
5100 if (r)
5101 goto out;
5102
26bc5340
AG
5103 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5104 if (r)
5105 goto out;
5106
5107 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
5108#ifdef CONFIG_DEV_COREDUMP
5109 tmp_adev->reset_vram_lost = vram_lost;
5110 memset(&tmp_adev->reset_task_info, 0,
5111 sizeof(tmp_adev->reset_task_info));
5112 if (reset_context->job && reset_context->job->vm)
5113 tmp_adev->reset_task_info =
5114 reset_context->job->vm->task_info;
5115 amdgpu_reset_capture_coredumpm(tmp_adev);
5116#endif
26bc5340 5117 if (vram_lost) {
77e7f829 5118 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5119 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5120 }
5121
26bc5340
AG
5122 r = amdgpu_device_fw_loading(tmp_adev);
5123 if (r)
5124 return r;
5125
5126 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5127 if (r)
5128 goto out;
5129
5130 if (vram_lost)
5131 amdgpu_device_fill_reset_magic(tmp_adev);
5132
fdafb359
EQ
5133 /*
5134 * Add this ASIC as tracked as reset was already
5135 * complete successfully.
5136 */
5137 amdgpu_register_gpu_instance(tmp_adev);
5138
04442bf7
LL
5139 if (!reset_context->hive &&
5140 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5141 amdgpu_xgmi_add_device(tmp_adev);
5142
7c04ca50 5143 r = amdgpu_device_ip_late_init(tmp_adev);
5144 if (r)
5145 goto out;
5146
087451f3 5147 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5148
e8fbaf03
GC
5149 /*
5150 * The GPU enters bad state once faulty pages
5151 * by ECC has reached the threshold, and ras
5152 * recovery is scheduled next. So add one check
5153 * here to break recovery if it indeed exceeds
5154 * bad page threshold, and remind user to
5155 * retire this GPU or setting one bigger
5156 * bad_page_threshold value to fix this once
5157 * probing driver again.
5158 */
11003c68 5159 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5160 /* must succeed. */
5161 amdgpu_ras_resume(tmp_adev);
5162 } else {
5163 r = -EINVAL;
5164 goto out;
5165 }
e79a04d5 5166
26bc5340 5167 /* Update PSP FW topology after reset */
04442bf7
LL
5168 if (reset_context->hive &&
5169 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5170 r = amdgpu_xgmi_update_topology(
5171 reset_context->hive, tmp_adev);
26bc5340
AG
5172 }
5173 }
5174
26bc5340
AG
5175out:
5176 if (!r) {
5177 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5178 r = amdgpu_ib_ring_tests(tmp_adev);
5179 if (r) {
5180 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5181 need_full_reset = true;
5182 r = -EAGAIN;
5183 goto end;
5184 }
5185 }
5186
5187 if (!r)
5188 r = amdgpu_device_recover_vram(tmp_adev);
5189 else
5190 tmp_adev->asic_reset_res = r;
5191 }
5192
5193end:
04442bf7
LL
5194 if (need_full_reset)
5195 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5196 else
5197 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5198 return r;
5199}
5200
e923be99 5201static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5202{
5740682e 5203
a3a09142
AD
5204 switch (amdgpu_asic_reset_method(adev)) {
5205 case AMD_RESET_METHOD_MODE1:
5206 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5207 break;
5208 case AMD_RESET_METHOD_MODE2:
5209 adev->mp1_state = PP_MP1_STATE_RESET;
5210 break;
5211 default:
5212 adev->mp1_state = PP_MP1_STATE_NONE;
5213 break;
5214 }
26bc5340 5215}
d38ceaf9 5216
e923be99 5217static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5218{
89041940 5219 amdgpu_vf_error_trans_all(adev);
a3a09142 5220 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5221}
5222
3f12acc8
EQ
5223static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5224{
5225 struct pci_dev *p = NULL;
5226
5227 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5228 adev->pdev->bus->number, 1);
5229 if (p) {
5230 pm_runtime_enable(&(p->dev));
5231 pm_runtime_resume(&(p->dev));
5232 }
b85e285e
YY
5233
5234 pci_dev_put(p);
3f12acc8
EQ
5235}
5236
5237static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5238{
5239 enum amd_reset_method reset_method;
5240 struct pci_dev *p = NULL;
5241 u64 expires;
5242
5243 /*
5244 * For now, only BACO and mode1 reset are confirmed
5245 * to suffer the audio issue without proper suspended.
5246 */
5247 reset_method = amdgpu_asic_reset_method(adev);
5248 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5249 (reset_method != AMD_RESET_METHOD_MODE1))
5250 return -EINVAL;
5251
5252 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5253 adev->pdev->bus->number, 1);
5254 if (!p)
5255 return -ENODEV;
5256
5257 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5258 if (!expires)
5259 /*
5260 * If we cannot get the audio device autosuspend delay,
5261 * a fixed 4S interval will be used. Considering 3S is
5262 * the audio controller default autosuspend delay setting.
5263 * 4S used here is guaranteed to cover that.
5264 */
54b7feb9 5265 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5266
5267 while (!pm_runtime_status_suspended(&(p->dev))) {
5268 if (!pm_runtime_suspend(&(p->dev)))
5269 break;
5270
5271 if (expires < ktime_get_mono_fast_ns()) {
5272 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5273 pci_dev_put(p);
3f12acc8
EQ
5274 /* TODO: abort the succeeding gpu reset? */
5275 return -ETIMEDOUT;
5276 }
5277 }
5278
5279 pm_runtime_disable(&(p->dev));
5280
b85e285e 5281 pci_dev_put(p);
3f12acc8
EQ
5282 return 0;
5283}
5284
d193b12b 5285static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5286{
5287 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5288
5289#if defined(CONFIG_DEBUG_FS)
5290 if (!amdgpu_sriov_vf(adev))
5291 cancel_work(&adev->reset_work);
5292#endif
5293
5294 if (adev->kfd.dev)
5295 cancel_work(&adev->kfd.reset_work);
5296
5297 if (amdgpu_sriov_vf(adev))
5298 cancel_work(&adev->virt.flr_work);
5299
5300 if (con && adev->ras_enabled)
5301 cancel_work(&con->recovery_work);
5302
5303}
5304
26bc5340 5305/**
6e9c65f7 5306 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5307 *
982a820b 5308 * @adev: amdgpu_device pointer
26bc5340 5309 * @job: which job trigger hang
80bd2de1 5310 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5311 *
5312 * Attempt to reset the GPU if it has hung (all asics).
5313 * Attempt to do soft-reset or full-reset and reinitialize Asic
5314 * Returns 0 for success or an error on failure.
5315 */
5316
cf727044 5317int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5318 struct amdgpu_job *job,
5319 struct amdgpu_reset_context *reset_context)
26bc5340 5320{
1d721ed6 5321 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5322 bool job_signaled = false;
26bc5340 5323 struct amdgpu_hive_info *hive = NULL;
26bc5340 5324 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5325 int i, r = 0;
bb5c7235 5326 bool need_emergency_restart = false;
3f12acc8 5327 bool audio_suspended = false;
f5c7e779
YC
5328 bool gpu_reset_for_dev_remove = false;
5329
5330 gpu_reset_for_dev_remove =
5331 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5332 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5333
6e3cd2a9 5334 /*
bb5c7235
WS
5335 * Special case: RAS triggered and full reset isn't supported
5336 */
5337 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5338
d5ea093e
AG
5339 /*
5340 * Flush RAM to disk so that after reboot
5341 * the user can read log and see why the system rebooted.
5342 */
bb5c7235 5343 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5344 DRM_WARN("Emergency reboot.");
5345
5346 ksys_sync_helper();
5347 emergency_restart();
5348 }
5349
b823821f 5350 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5351 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5352
175ac6ec
ZL
5353 if (!amdgpu_sriov_vf(adev))
5354 hive = amdgpu_get_xgmi_hive(adev);
681260df 5355 if (hive)
53b3f8f4 5356 mutex_lock(&hive->hive_lock);
26bc5340 5357
f1549c09
LG
5358 reset_context->job = job;
5359 reset_context->hive = hive;
9e94d22c
EQ
5360 /*
5361 * Build list of devices to reset.
5362 * In case we are in XGMI hive mode, resort the device list
5363 * to put adev in the 1st position.
5364 */
5365 INIT_LIST_HEAD(&device_list);
175ac6ec 5366 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5367 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5368 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5369 if (gpu_reset_for_dev_remove && adev->shutdown)
5370 tmp_adev->shutdown = true;
5371 }
655ce9cb 5372 if (!list_is_first(&adev->reset_list, &device_list))
5373 list_rotate_to_front(&adev->reset_list, &device_list);
5374 device_list_handle = &device_list;
26bc5340 5375 } else {
655ce9cb 5376 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5377 device_list_handle = &device_list;
5378 }
5379
e923be99
AG
5380 /* We need to lock reset domain only once both for XGMI and single device */
5381 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5382 reset_list);
3675c2f2 5383 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5384
1d721ed6 5385 /* block all schedulers and reset given job's ring */
655ce9cb 5386 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5387
e923be99 5388 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5389
3f12acc8
EQ
5390 /*
5391 * Try to put the audio codec into suspend state
5392 * before gpu reset started.
5393 *
5394 * Due to the power domain of the graphics device
5395 * is shared with AZ power domain. Without this,
5396 * we may change the audio hardware from behind
5397 * the audio driver's back. That will trigger
5398 * some audio codec errors.
5399 */
5400 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5401 audio_suspended = true;
5402
9e94d22c
EQ
5403 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5404
52fb44cf
EQ
5405 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5406
c004d44e 5407 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5408 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5409
12ffa55d
AG
5410 /*
5411 * Mark these ASICs to be reseted as untracked first
5412 * And add them back after reset completed
5413 */
5414 amdgpu_unregister_gpu_instance(tmp_adev);
5415
163d4cd2 5416 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5417
f1c1314b 5418 /* disable ras on ALL IPs */
bb5c7235 5419 if (!need_emergency_restart &&
b823821f 5420 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5421 amdgpu_ras_suspend(tmp_adev);
5422
1d721ed6
AG
5423 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5424 struct amdgpu_ring *ring = tmp_adev->rings[i];
5425
5426 if (!ring || !ring->sched.thread)
5427 continue;
5428
0b2d2c2e 5429 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5430
bb5c7235 5431 if (need_emergency_restart)
7c6e68c7 5432 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5433 }
8f8c80f4 5434 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5435 }
5436
bb5c7235 5437 if (need_emergency_restart)
7c6e68c7
AG
5438 goto skip_sched_resume;
5439
1d721ed6
AG
5440 /*
5441 * Must check guilty signal here since after this point all old
5442 * HW fences are force signaled.
5443 *
5444 * job->base holds a reference to parent fence
5445 */
f6a3f660 5446 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5447 job_signaled = true;
1d721ed6
AG
5448 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5449 goto skip_hw_reset;
5450 }
5451
26bc5340 5452retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5453 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5454 if (gpu_reset_for_dev_remove) {
5455 /* Workaroud for ASICs need to disable SMC first */
5456 amdgpu_device_smu_fini_early(tmp_adev);
5457 }
f1549c09 5458 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5459 /*TODO Should we stop ?*/
5460 if (r) {
aac89168 5461 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5462 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5463 tmp_adev->asic_reset_res = r;
5464 }
247c7b0d
AG
5465
5466 /*
5467 * Drop all pending non scheduler resets. Scheduler resets
5468 * were already dropped during drm_sched_stop
5469 */
d193b12b 5470 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5471 }
5472
5473 /* Actual ASIC resets if needed.*/
4f30d920 5474 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5475 if (amdgpu_sriov_vf(adev)) {
5476 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5477 if (r)
5478 adev->asic_reset_res = r;
950d6425 5479
28606c4e
YC
5480 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5481 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5482 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
950d6425 5483 amdgpu_ras_resume(adev);
26bc5340 5484 } else {
f1549c09 5485 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5486 if (r && r == -EAGAIN)
26bc5340 5487 goto retry;
f5c7e779
YC
5488
5489 if (!r && gpu_reset_for_dev_remove)
5490 goto recover_end;
26bc5340
AG
5491 }
5492
1d721ed6
AG
5493skip_hw_reset:
5494
26bc5340 5495 /* Post ASIC reset for all devs .*/
655ce9cb 5496 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5497
1d721ed6
AG
5498 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5499 struct amdgpu_ring *ring = tmp_adev->rings[i];
5500
5501 if (!ring || !ring->sched.thread)
5502 continue;
5503
6868a2c4 5504 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5505 }
5506
693073a0 5507 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5508 amdgpu_mes_self_test(tmp_adev);
5509
1053b9c9 5510 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5511 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5512 }
5513
7258fa31
SK
5514 if (tmp_adev->asic_reset_res)
5515 r = tmp_adev->asic_reset_res;
5516
1d721ed6 5517 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5518
5519 if (r) {
5520 /* bad news, how to tell it to userspace ? */
12ffa55d 5521 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5522 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5523 } else {
12ffa55d 5524 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5525 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5526 DRM_WARN("smart shift update failed\n");
26bc5340 5527 }
7c6e68c7 5528 }
26bc5340 5529
7c6e68c7 5530skip_sched_resume:
655ce9cb 5531 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5532 /* unlock kfd: SRIOV would do it separately */
c004d44e 5533 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5534 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5535
5536 /* kfd_post_reset will do nothing if kfd device is not initialized,
5537 * need to bring up kfd here if it's not be initialized before
5538 */
5539 if (!adev->kfd.init_complete)
5540 amdgpu_amdkfd_device_init(adev);
5541
3f12acc8
EQ
5542 if (audio_suspended)
5543 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5544
5545 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5546
5547 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5548 }
5549
f5c7e779 5550recover_end:
e923be99
AG
5551 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5552 reset_list);
5553 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5554
9e94d22c 5555 if (hive) {
9e94d22c 5556 mutex_unlock(&hive->hive_lock);
d95e8e97 5557 amdgpu_put_xgmi_hive(hive);
9e94d22c 5558 }
26bc5340 5559
f287a3c5 5560 if (r)
26bc5340 5561 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5562
5563 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5564 return r;
5565}
5566
e3ecdffa
AD
5567/**
5568 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5569 *
5570 * @adev: amdgpu_device pointer
5571 *
5572 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5573 * and lanes) of the slot the device is in. Handles APUs and
5574 * virtualized environments where PCIE config space may not be available.
5575 */
5494d864 5576static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5577{
5d9a6330 5578 struct pci_dev *pdev;
c5313457
HK
5579 enum pci_bus_speed speed_cap, platform_speed_cap;
5580 enum pcie_link_width platform_link_width;
d0dd7f0c 5581
cd474ba0
AD
5582 if (amdgpu_pcie_gen_cap)
5583 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5584
cd474ba0
AD
5585 if (amdgpu_pcie_lane_cap)
5586 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5587
cd474ba0
AD
5588 /* covers APUs as well */
5589 if (pci_is_root_bus(adev->pdev->bus)) {
5590 if (adev->pm.pcie_gen_mask == 0)
5591 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5592 if (adev->pm.pcie_mlw_mask == 0)
5593 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5594 return;
cd474ba0 5595 }
d0dd7f0c 5596
c5313457
HK
5597 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5598 return;
5599
dbaa922b
AD
5600 pcie_bandwidth_available(adev->pdev, NULL,
5601 &platform_speed_cap, &platform_link_width);
c5313457 5602
cd474ba0 5603 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5604 /* asic caps */
5605 pdev = adev->pdev;
5606 speed_cap = pcie_get_speed_cap(pdev);
5607 if (speed_cap == PCI_SPEED_UNKNOWN) {
5608 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5609 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5610 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5611 } else {
2b3a1f51
FX
5612 if (speed_cap == PCIE_SPEED_32_0GT)
5613 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5614 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5615 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5616 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5617 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5618 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5619 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5620 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5621 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5622 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5623 else if (speed_cap == PCIE_SPEED_8_0GT)
5624 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5625 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5626 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5627 else if (speed_cap == PCIE_SPEED_5_0GT)
5628 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5629 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5630 else
5631 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5632 }
5633 /* platform caps */
c5313457 5634 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5635 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5636 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5637 } else {
2b3a1f51
FX
5638 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5639 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5640 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5641 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5642 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5643 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5644 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5645 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5646 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5647 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5648 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5649 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5650 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5651 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5652 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5653 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5654 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5655 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5656 else
5657 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5658
cd474ba0
AD
5659 }
5660 }
5661 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5662 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5663 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5664 } else {
c5313457 5665 switch (platform_link_width) {
5d9a6330 5666 case PCIE_LNK_X32:
cd474ba0
AD
5667 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5668 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5669 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5670 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5671 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5672 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5673 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5674 break;
5d9a6330 5675 case PCIE_LNK_X16:
cd474ba0
AD
5676 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5677 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5678 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5679 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5680 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5681 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5682 break;
5d9a6330 5683 case PCIE_LNK_X12:
cd474ba0
AD
5684 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5685 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5686 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5687 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5688 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5689 break;
5d9a6330 5690 case PCIE_LNK_X8:
cd474ba0
AD
5691 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5692 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5693 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5694 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5695 break;
5d9a6330 5696 case PCIE_LNK_X4:
cd474ba0
AD
5697 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5698 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5699 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5700 break;
5d9a6330 5701 case PCIE_LNK_X2:
cd474ba0
AD
5702 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5703 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5704 break;
5d9a6330 5705 case PCIE_LNK_X1:
cd474ba0
AD
5706 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5707 break;
5708 default:
5709 break;
5710 }
d0dd7f0c
AD
5711 }
5712 }
5713}
d38ceaf9 5714
08a2fd23
RE
5715/**
5716 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5717 *
5718 * @adev: amdgpu_device pointer
5719 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5720 *
5721 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5722 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5723 * @peer_adev.
5724 */
5725bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5726 struct amdgpu_device *peer_adev)
5727{
5728#ifdef CONFIG_HSA_AMD_P2P
5729 uint64_t address_mask = peer_adev->dev->dma_mask ?
5730 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5731 resource_size_t aper_limit =
5732 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5733 bool p2p_access =
5734 !adev->gmc.xgmi.connected_to_cpu &&
5735 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5736
5737 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5738 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5739 !(adev->gmc.aper_base & address_mask ||
5740 aper_limit & address_mask));
5741#else
5742 return false;
5743#endif
5744}
5745
361dbd01
AD
5746int amdgpu_device_baco_enter(struct drm_device *dev)
5747{
1348969a 5748 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5749 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5750
6ab68650 5751 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5752 return -ENOTSUPP;
5753
8ab0d6f0 5754 if (ras && adev->ras_enabled &&
acdae216 5755 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5756 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5757
9530273e 5758 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5759}
5760
5761int amdgpu_device_baco_exit(struct drm_device *dev)
5762{
1348969a 5763 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5764 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5765 int ret = 0;
361dbd01 5766
6ab68650 5767 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5768 return -ENOTSUPP;
5769
9530273e
EQ
5770 ret = amdgpu_dpm_baco_exit(adev);
5771 if (ret)
5772 return ret;
7a22677b 5773
8ab0d6f0 5774 if (ras && adev->ras_enabled &&
acdae216 5775 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5776 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5777
1bece222
CL
5778 if (amdgpu_passthrough(adev) &&
5779 adev->nbio.funcs->clear_doorbell_interrupt)
5780 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5781
7a22677b 5782 return 0;
361dbd01 5783}
c9a6b82f
AG
5784
5785/**
5786 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5787 * @pdev: PCI device struct
5788 * @state: PCI channel state
5789 *
5790 * Description: Called when a PCI error is detected.
5791 *
5792 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5793 */
5794pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5795{
5796 struct drm_device *dev = pci_get_drvdata(pdev);
5797 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5798 int i;
c9a6b82f
AG
5799
5800 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5801
6894305c
AG
5802 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5803 DRM_WARN("No support for XGMI hive yet...");
5804 return PCI_ERS_RESULT_DISCONNECT;
5805 }
5806
e17e27f9
GC
5807 adev->pci_channel_state = state;
5808
c9a6b82f
AG
5809 switch (state) {
5810 case pci_channel_io_normal:
5811 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5812 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5813 case pci_channel_io_frozen:
5814 /*
d0fb18b5 5815 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5816 * to GPU during PCI error recovery
5817 */
3675c2f2 5818 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5819 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5820
5821 /*
5822 * Block any work scheduling as we do for regular GPU reset
5823 * for the duration of the recovery
5824 */
5825 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5826 struct amdgpu_ring *ring = adev->rings[i];
5827
5828 if (!ring || !ring->sched.thread)
5829 continue;
5830
5831 drm_sched_stop(&ring->sched, NULL);
5832 }
8f8c80f4 5833 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5834 return PCI_ERS_RESULT_NEED_RESET;
5835 case pci_channel_io_perm_failure:
5836 /* Permanent error, prepare for device removal */
5837 return PCI_ERS_RESULT_DISCONNECT;
5838 }
5839
5840 return PCI_ERS_RESULT_NEED_RESET;
5841}
5842
5843/**
5844 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5845 * @pdev: pointer to PCI device
5846 */
5847pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5848{
5849
5850 DRM_INFO("PCI error: mmio enabled callback!!\n");
5851
5852 /* TODO - dump whatever for debugging purposes */
5853
5854 /* This called only if amdgpu_pci_error_detected returns
5855 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5856 * works, no need to reset slot.
5857 */
5858
5859 return PCI_ERS_RESULT_RECOVERED;
5860}
5861
5862/**
5863 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5864 * @pdev: PCI device struct
5865 *
5866 * Description: This routine is called by the pci error recovery
5867 * code after the PCI slot has been reset, just before we
5868 * should resume normal operations.
5869 */
5870pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5871{
5872 struct drm_device *dev = pci_get_drvdata(pdev);
5873 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5874 int r, i;
04442bf7 5875 struct amdgpu_reset_context reset_context;
362c7b91 5876 u32 memsize;
7ac71382 5877 struct list_head device_list;
c9a6b82f
AG
5878
5879 DRM_INFO("PCI error: slot reset callback!!\n");
5880
04442bf7
LL
5881 memset(&reset_context, 0, sizeof(reset_context));
5882
7ac71382 5883 INIT_LIST_HEAD(&device_list);
655ce9cb 5884 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5885
362c7b91
AG
5886 /* wait for asic to come out of reset */
5887 msleep(500);
5888
7ac71382 5889 /* Restore PCI confspace */
c1dd4aa6 5890 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5891
362c7b91
AG
5892 /* confirm ASIC came out of reset */
5893 for (i = 0; i < adev->usec_timeout; i++) {
5894 memsize = amdgpu_asic_get_config_memsize(adev);
5895
5896 if (memsize != 0xffffffff)
5897 break;
5898 udelay(1);
5899 }
5900 if (memsize == 0xffffffff) {
5901 r = -ETIME;
5902 goto out;
5903 }
5904
04442bf7
LL
5905 reset_context.method = AMD_RESET_METHOD_NONE;
5906 reset_context.reset_req_dev = adev;
5907 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5908 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5909
7afefb81 5910 adev->no_hw_access = true;
04442bf7 5911 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5912 adev->no_hw_access = false;
c9a6b82f
AG
5913 if (r)
5914 goto out;
5915
04442bf7 5916 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5917
5918out:
c9a6b82f 5919 if (!r) {
c1dd4aa6
AG
5920 if (amdgpu_device_cache_pci_state(adev->pdev))
5921 pci_restore_state(adev->pdev);
5922
c9a6b82f
AG
5923 DRM_INFO("PCIe error recovery succeeded\n");
5924 } else {
5925 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5926 amdgpu_device_unset_mp1_state(adev);
5927 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5928 }
5929
5930 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5931}
5932
5933/**
5934 * amdgpu_pci_resume() - resume normal ops after PCI reset
5935 * @pdev: pointer to PCI device
5936 *
5937 * Called when the error recovery driver tells us that its
505199a3 5938 * OK to resume normal operation.
c9a6b82f
AG
5939 */
5940void amdgpu_pci_resume(struct pci_dev *pdev)
5941{
5942 struct drm_device *dev = pci_get_drvdata(pdev);
5943 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5944 int i;
c9a6b82f 5945
c9a6b82f
AG
5946
5947 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5948
e17e27f9
GC
5949 /* Only continue execution for the case of pci_channel_io_frozen */
5950 if (adev->pci_channel_state != pci_channel_io_frozen)
5951 return;
5952
acd89fca
AG
5953 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5954 struct amdgpu_ring *ring = adev->rings[i];
5955
5956 if (!ring || !ring->sched.thread)
5957 continue;
5958
acd89fca
AG
5959 drm_sched_start(&ring->sched, true);
5960 }
5961
e923be99
AG
5962 amdgpu_device_unset_mp1_state(adev);
5963 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5964}
c1dd4aa6
AG
5965
5966bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5967{
5968 struct drm_device *dev = pci_get_drvdata(pdev);
5969 struct amdgpu_device *adev = drm_to_adev(dev);
5970 int r;
5971
5972 r = pci_save_state(pdev);
5973 if (!r) {
5974 kfree(adev->pci_state);
5975
5976 adev->pci_state = pci_store_saved_state(pdev);
5977
5978 if (!adev->pci_state) {
5979 DRM_ERROR("Failed to store PCI saved state");
5980 return false;
5981 }
5982 } else {
5983 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5984 return false;
5985 }
5986
5987 return true;
5988}
5989
5990bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5991{
5992 struct drm_device *dev = pci_get_drvdata(pdev);
5993 struct amdgpu_device *adev = drm_to_adev(dev);
5994 int r;
5995
5996 if (!adev->pci_state)
5997 return false;
5998
5999 r = pci_load_saved_state(pdev, adev->pci_state);
6000
6001 if (!r) {
6002 pci_restore_state(pdev);
6003 } else {
6004 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6005 return false;
6006 }
6007
6008 return true;
6009}
6010
810085dd
EH
6011void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6012 struct amdgpu_ring *ring)
6013{
6014#ifdef CONFIG_X86_64
b818a5d3 6015 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6016 return;
6017#endif
6018 if (adev->gmc.xgmi.connected_to_cpu)
6019 return;
6020
6021 if (ring && ring->funcs->emit_hdp_flush)
6022 amdgpu_ring_emit_hdp_flush(ring);
6023 else
6024 amdgpu_asic_flush_hdp(adev, ring);
6025}
c1dd4aa6 6026
810085dd
EH
6027void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6028 struct amdgpu_ring *ring)
6029{
6030#ifdef CONFIG_X86_64
b818a5d3 6031 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6032 return;
6033#endif
6034 if (adev->gmc.xgmi.connected_to_cpu)
6035 return;
c1dd4aa6 6036
810085dd
EH
6037 amdgpu_asic_invalidate_hdp(adev, ring);
6038}
34f3a4a9 6039
89a7a870
AG
6040int amdgpu_in_reset(struct amdgpu_device *adev)
6041{
6042 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6043}
6044
34f3a4a9
LY
6045/**
6046 * amdgpu_device_halt() - bring hardware to some kind of halt state
6047 *
6048 * @adev: amdgpu_device pointer
6049 *
6050 * Bring hardware to some kind of halt state so that no one can touch it
6051 * any more. It will help to maintain error context when error occurred.
6052 * Compare to a simple hang, the system will keep stable at least for SSH
6053 * access. Then it should be trivial to inspect the hardware state and
6054 * see what's going on. Implemented as following:
6055 *
6056 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6057 * clears all CPU mappings to device, disallows remappings through page faults
6058 * 2. amdgpu_irq_disable_all() disables all interrupts
6059 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6060 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6061 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6062 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6063 * flush any in flight DMA operations
6064 */
6065void amdgpu_device_halt(struct amdgpu_device *adev)
6066{
6067 struct pci_dev *pdev = adev->pdev;
e0f943b4 6068 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6069
2c1c7ba4 6070 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6071 drm_dev_unplug(ddev);
6072
6073 amdgpu_irq_disable_all(adev);
6074
6075 amdgpu_fence_driver_hw_fini(adev);
6076
6077 adev->no_hw_access = true;
6078
6079 amdgpu_device_unmap_mmio(adev);
6080
6081 pci_disable_device(pdev);
6082 pci_wait_for_pending_transaction(pdev);
6083}
86700a40
XD
6084
6085u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6086 u32 reg)
6087{
6088 unsigned long flags, address, data;
6089 u32 r;
6090
6091 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6092 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6093
6094 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6095 WREG32(address, reg * 4);
6096 (void)RREG32(address);
6097 r = RREG32(data);
6098 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6099 return r;
6100}
6101
6102void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6103 u32 reg, u32 v)
6104{
6105 unsigned long flags, address, data;
6106
6107 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6108 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6109
6110 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6111 WREG32(address, reg * 4);
6112 (void)RREG32(address);
6113 WREG32(data, v);
6114 (void)RREG32(data);
6115 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6116}
68ce8b24
CK
6117
6118/**
6119 * amdgpu_device_switch_gang - switch to a new gang
6120 * @adev: amdgpu_device pointer
6121 * @gang: the gang to switch to
6122 *
6123 * Try to switch to a new gang.
6124 * Returns: NULL if we switched to the new gang or a reference to the current
6125 * gang leader.
6126 */
6127struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6128 struct dma_fence *gang)
6129{
6130 struct dma_fence *old = NULL;
6131
6132 do {
6133 dma_fence_put(old);
6134 rcu_read_lock();
6135 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6136 rcu_read_unlock();
6137
6138 if (old == gang)
6139 break;
6140
6141 if (!dma_fence_is_signaled(old))
6142 return old;
6143
6144 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6145 old, gang) != old);
6146
6147 dma_fence_put(old);
6148 return NULL;
6149}
220c8cc8
AD
6150
6151bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6152{
6153 switch (adev->asic_type) {
6154#ifdef CONFIG_DRM_AMDGPU_SI
6155 case CHIP_HAINAN:
6156#endif
6157 case CHIP_TOPAZ:
6158 /* chips with no display hardware */
6159 return false;
6160#ifdef CONFIG_DRM_AMDGPU_SI
6161 case CHIP_TAHITI:
6162 case CHIP_PITCAIRN:
6163 case CHIP_VERDE:
6164 case CHIP_OLAND:
6165#endif
6166#ifdef CONFIG_DRM_AMDGPU_CIK
6167 case CHIP_BONAIRE:
6168 case CHIP_HAWAII:
6169 case CHIP_KAVERI:
6170 case CHIP_KABINI:
6171 case CHIP_MULLINS:
6172#endif
6173 case CHIP_TONGA:
6174 case CHIP_FIJI:
6175 case CHIP_POLARIS10:
6176 case CHIP_POLARIS11:
6177 case CHIP_POLARIS12:
6178 case CHIP_VEGAM:
6179 case CHIP_CARRIZO:
6180 case CHIP_STONEY:
6181 /* chips with display hardware */
6182 return true;
6183 default:
6184 /* IP discovery */
6185 if (!adev->ip_versions[DCE_HWIP][0] ||
6186 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6187 return false;
6188 return true;
6189 }
6190}
81283fee
JZ
6191
6192uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6193 uint32_t inst, uint32_t reg_addr, char reg_name[],
6194 uint32_t expected_value, uint32_t mask)
6195{
6196 uint32_t ret = 0;
6197 uint32_t old_ = 0;
6198 uint32_t tmp_ = RREG32(reg_addr);
6199 uint32_t loop = adev->usec_timeout;
6200
6201 while ((tmp_ & (mask)) != (expected_value)) {
6202 if (old_ != tmp_) {
6203 loop = adev->usec_timeout;
6204 old_ = tmp_;
6205 } else
6206 udelay(1);
6207 tmp_ = RREG32(reg_addr);
6208 loop--;
6209 if (!loop) {
6210 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6211 inst, reg_name, (uint32_t)expected_value,
6212 (uint32_t)(tmp_ & (mask)));
6213 ret = -ETIMEDOUT;
6214 break;
6215 }
6216 }
6217 return ret;
6218}