drm/amdgpu: remove repeat code for mes_add_queue_pkt
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
162static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166
KR
167/**
168 * DOC: product_name
169 *
170 * The amdgpu driver provides a sysfs API for reporting the product name
171 * for the device
2c496a6c 172 * The file product_name is used for this and returns the product name
bd607166
KR
173 * as returned from the FRU.
174 * NOTE: This is only available for certain server cards
175 */
176
177static ssize_t amdgpu_device_get_product_name(struct device *dev,
178 struct device_attribute *attr, char *buf)
179{
180 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 181 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 182
36000c7a 183 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
184}
185
186static DEVICE_ATTR(product_name, S_IRUGO,
187 amdgpu_device_get_product_name, NULL);
188
189/**
190 * DOC: product_number
191 *
192 * The amdgpu driver provides a sysfs API for reporting the part number
193 * for the device
2c496a6c 194 * The file product_number is used for this and returns the part number
bd607166
KR
195 * as returned from the FRU.
196 * NOTE: This is only available for certain server cards
197 */
198
199static ssize_t amdgpu_device_get_product_number(struct device *dev,
200 struct device_attribute *attr, char *buf)
201{
202 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 203 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 204
36000c7a 205 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
206}
207
208static DEVICE_ATTR(product_number, S_IRUGO,
209 amdgpu_device_get_product_number, NULL);
210
211/**
212 * DOC: serial_number
213 *
214 * The amdgpu driver provides a sysfs API for reporting the serial number
215 * for the device
216 * The file serial_number is used for this and returns the serial number
217 * as returned from the FRU.
218 * NOTE: This is only available for certain server cards
219 */
220
221static ssize_t amdgpu_device_get_serial_number(struct device *dev,
222 struct device_attribute *attr, char *buf)
223{
224 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 225 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 226
36000c7a 227 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
228}
229
230static DEVICE_ATTR(serial_number, S_IRUGO,
231 amdgpu_device_get_serial_number, NULL);
232
fd496ca8 233/**
b98c6299 234 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
235 *
236 * @dev: drm_device pointer
237 *
b98c6299 238 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
239 * otherwise return false.
240 */
b98c6299 241bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
242{
243 struct amdgpu_device *adev = drm_to_adev(dev);
244
b98c6299 245 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
246 return true;
247 return false;
248}
249
e3ecdffa 250/**
0330b848 251 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
252 *
253 * @dev: drm_device pointer
254 *
b98c6299 255 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
256 * otherwise return false.
257 */
31af062a 258bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 259{
1348969a 260 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 261
b98c6299
AD
262 if (adev->has_pr3 ||
263 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
264 return true;
265 return false;
266}
267
a69cba42
AD
268/**
269 * amdgpu_device_supports_baco - Does the device support BACO
270 *
271 * @dev: drm_device pointer
272 *
273 * Returns true if the device supporte BACO,
274 * otherwise return false.
275 */
276bool amdgpu_device_supports_baco(struct drm_device *dev)
277{
1348969a 278 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
279
280 return amdgpu_asic_supports_baco(adev);
281}
282
3fa8f89d
S
283/**
284 * amdgpu_device_supports_smart_shift - Is the device dGPU with
285 * smart shift support
286 *
287 * @dev: drm_device pointer
288 *
289 * Returns true if the device is a dGPU with Smart Shift support,
290 * otherwise returns false.
291 */
292bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
293{
294 return (amdgpu_device_supports_boco(dev) &&
295 amdgpu_acpi_is_power_shift_control_supported());
296}
297
6e3cd2a9
MCC
298/*
299 * VRAM access helper functions
300 */
301
e35e2b11 302/**
048af66b 303 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
304 *
305 * @adev: amdgpu_device pointer
306 * @pos: offset of the buffer in vram
307 * @buf: virtual address of the buffer in system memory
308 * @size: read/write size, sizeof(@buf) must > @size
309 * @write: true - write to vram, otherwise - read from vram
310 */
048af66b
KW
311void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
312 void *buf, size_t size, bool write)
e35e2b11 313{
e35e2b11 314 unsigned long flags;
048af66b
KW
315 uint32_t hi = ~0, tmp = 0;
316 uint32_t *data = buf;
ce05ac56 317 uint64_t last;
f89f8c6b 318 int idx;
ce05ac56 319
c58a863b 320 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 321 return;
9d11eb0d 322
048af66b
KW
323 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
324
325 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
326 for (last = pos + size; pos < last; pos += 4) {
327 tmp = pos >> 31;
328
329 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
330 if (tmp != hi) {
331 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
332 hi = tmp;
333 }
334 if (write)
335 WREG32_NO_KIQ(mmMM_DATA, *data++);
336 else
337 *data++ = RREG32_NO_KIQ(mmMM_DATA);
338 }
339
340 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
341 drm_dev_exit(idx);
342}
343
344/**
bbe04dec 345 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
346 *
347 * @adev: amdgpu_device pointer
348 * @pos: offset of the buffer in vram
349 * @buf: virtual address of the buffer in system memory
350 * @size: read/write size, sizeof(@buf) must > @size
351 * @write: true - write to vram, otherwise - read from vram
352 *
353 * The return value means how many bytes have been transferred.
354 */
355size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
356 void *buf, size_t size, bool write)
357{
9d11eb0d 358#ifdef CONFIG_64BIT
048af66b
KW
359 void __iomem *addr;
360 size_t count = 0;
361 uint64_t last;
362
363 if (!adev->mman.aper_base_kaddr)
364 return 0;
365
9d11eb0d
CK
366 last = min(pos + size, adev->gmc.visible_vram_size);
367 if (last > pos) {
048af66b
KW
368 addr = adev->mman.aper_base_kaddr + pos;
369 count = last - pos;
9d11eb0d
CK
370
371 if (write) {
372 memcpy_toio(addr, buf, count);
373 mb();
810085dd 374 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 375 } else {
810085dd 376 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
377 mb();
378 memcpy_fromio(buf, addr, count);
379 }
380
9d11eb0d 381 }
048af66b
KW
382
383 return count;
384#else
385 return 0;
9d11eb0d 386#endif
048af66b 387}
9d11eb0d 388
048af66b
KW
389/**
390 * amdgpu_device_vram_access - read/write a buffer in vram
391 *
392 * @adev: amdgpu_device pointer
393 * @pos: offset of the buffer in vram
394 * @buf: virtual address of the buffer in system memory
395 * @size: read/write size, sizeof(@buf) must > @size
396 * @write: true - write to vram, otherwise - read from vram
397 */
398void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
399 void *buf, size_t size, bool write)
400{
401 size_t count;
e35e2b11 402
048af66b
KW
403 /* try to using vram apreature to access vram first */
404 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
405 size -= count;
406 if (size) {
407 /* using MM to access rest vram */
408 pos += count;
409 buf += count;
410 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
411 }
412}
413
d38ceaf9 414/*
f7ee1874 415 * register access helper functions.
d38ceaf9 416 */
56b53c0b
DL
417
418/* Check if hw access should be skipped because of hotplug or device error */
419bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
420{
7afefb81 421 if (adev->no_hw_access)
56b53c0b
DL
422 return true;
423
424#ifdef CONFIG_LOCKDEP
425 /*
426 * This is a bit complicated to understand, so worth a comment. What we assert
427 * here is that the GPU reset is not running on another thread in parallel.
428 *
429 * For this we trylock the read side of the reset semaphore, if that succeeds
430 * we know that the reset is not running in paralell.
431 *
432 * If the trylock fails we assert that we are either already holding the read
433 * side of the lock or are the reset thread itself and hold the write side of
434 * the lock.
435 */
436 if (in_task()) {
d0fb18b5
AG
437 if (down_read_trylock(&adev->reset_domain->sem))
438 up_read(&adev->reset_domain->sem);
56b53c0b 439 else
d0fb18b5 440 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
441 }
442#endif
443 return false;
444}
445
e3ecdffa 446/**
f7ee1874 447 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
448 *
449 * @adev: amdgpu_device pointer
450 * @reg: dword aligned register offset
451 * @acc_flags: access flags which require special behavior
452 *
453 * Returns the 32 bit value from the offset specified.
454 */
f7ee1874
HZ
455uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
456 uint32_t reg, uint32_t acc_flags)
d38ceaf9 457{
f4b373f4
TSD
458 uint32_t ret;
459
56b53c0b 460 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
461 return 0;
462
f7ee1874
HZ
463 if ((reg * 4) < adev->rmmio_size) {
464 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
465 amdgpu_sriov_runtime(adev) &&
d0fb18b5 466 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 467 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 468 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
469 } else {
470 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
471 }
472 } else {
473 ret = adev->pcie_rreg(adev, reg * 4);
81202807 474 }
bc992ba5 475
f7ee1874 476 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 477
f4b373f4 478 return ret;
d38ceaf9
AD
479}
480
421a2a30
ML
481/*
482 * MMIO register read with bytes helper functions
483 * @offset:bytes offset from MMIO start
484 *
485*/
486
e3ecdffa
AD
487/**
488 * amdgpu_mm_rreg8 - read a memory mapped IO register
489 *
490 * @adev: amdgpu_device pointer
491 * @offset: byte aligned register offset
492 *
493 * Returns the 8 bit value from the offset specified.
494 */
7cbbc745
AG
495uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
496{
56b53c0b 497 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
498 return 0;
499
421a2a30
ML
500 if (offset < adev->rmmio_size)
501 return (readb(adev->rmmio + offset));
502 BUG();
503}
504
505/*
506 * MMIO register write with bytes helper functions
507 * @offset:bytes offset from MMIO start
508 * @value: the value want to be written to the register
509 *
510*/
e3ecdffa
AD
511/**
512 * amdgpu_mm_wreg8 - read a memory mapped IO register
513 *
514 * @adev: amdgpu_device pointer
515 * @offset: byte aligned register offset
516 * @value: 8 bit value to write
517 *
518 * Writes the value specified to the offset specified.
519 */
7cbbc745
AG
520void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
521{
56b53c0b 522 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
523 return;
524
421a2a30
ML
525 if (offset < adev->rmmio_size)
526 writeb(value, adev->rmmio + offset);
527 else
528 BUG();
529}
530
e3ecdffa 531/**
f7ee1874 532 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
533 *
534 * @adev: amdgpu_device pointer
535 * @reg: dword aligned register offset
536 * @v: 32 bit value to write to the register
537 * @acc_flags: access flags which require special behavior
538 *
539 * Writes the value specified to the offset specified.
540 */
f7ee1874
HZ
541void amdgpu_device_wreg(struct amdgpu_device *adev,
542 uint32_t reg, uint32_t v,
543 uint32_t acc_flags)
d38ceaf9 544{
56b53c0b 545 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
546 return;
547
f7ee1874
HZ
548 if ((reg * 4) < adev->rmmio_size) {
549 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
550 amdgpu_sriov_runtime(adev) &&
d0fb18b5 551 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 552 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 553 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
554 } else {
555 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
556 }
557 } else {
558 adev->pcie_wreg(adev, reg * 4, v);
81202807 559 }
bc992ba5 560
f7ee1874 561 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 562}
d38ceaf9 563
03f2abb0 564/**
4cc9f86f 565 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 566 *
71579346
RB
567 * @adev: amdgpu_device pointer
568 * @reg: mmio/rlc register
569 * @v: value to write
570 *
571 * this function is invoked only for the debugfs register access
03f2abb0 572 */
f7ee1874 573void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
574 uint32_t reg, uint32_t v,
575 uint32_t xcc_id)
2e0cc4d4 576{
56b53c0b 577 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
578 return;
579
2e0cc4d4 580 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
581 adev->gfx.rlc.funcs &&
582 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 583 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 584 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
585 } else if ((reg * 4) >= adev->rmmio_size) {
586 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
587 } else {
588 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 589 }
d38ceaf9
AD
590}
591
1bba3683
HZ
592/**
593 * amdgpu_device_indirect_rreg - read an indirect register
594 *
595 * @adev: amdgpu_device pointer
22f453fb 596 * @reg_addr: indirect register address to read from
1bba3683
HZ
597 *
598 * Returns the value of indirect register @reg_addr
599 */
600u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
601 u32 reg_addr)
602{
65ba96e9 603 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
604 void __iomem *pcie_index_offset;
605 void __iomem *pcie_data_offset;
65ba96e9
HZ
606 u32 r;
607
608 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
609 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
610
611 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
612 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
613 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
614
615 writel(reg_addr, pcie_index_offset);
616 readl(pcie_index_offset);
617 r = readl(pcie_data_offset);
618 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
619
620 return r;
621}
622
0c552ed3
LM
623u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
624 u64 reg_addr)
625{
626 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
627 u32 r;
628 void __iomem *pcie_index_offset;
629 void __iomem *pcie_index_hi_offset;
630 void __iomem *pcie_data_offset;
631
632 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
633 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
634 if (adev->nbio.funcs->get_pcie_index_hi_offset)
635 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
636 else
637 pcie_index_hi = 0;
638
639 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
640 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
641 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
642 if (pcie_index_hi != 0)
643 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
644 pcie_index_hi * 4;
645
646 writel(reg_addr, pcie_index_offset);
647 readl(pcie_index_offset);
648 if (pcie_index_hi != 0) {
649 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
650 readl(pcie_index_hi_offset);
651 }
652 r = readl(pcie_data_offset);
653
654 /* clear the high bits */
655 if (pcie_index_hi != 0) {
656 writel(0, pcie_index_hi_offset);
657 readl(pcie_index_hi_offset);
658 }
659
660 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
661
662 return r;
663}
664
1bba3683
HZ
665/**
666 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
667 *
668 * @adev: amdgpu_device pointer
22f453fb 669 * @reg_addr: indirect register address to read from
1bba3683
HZ
670 *
671 * Returns the value of indirect register @reg_addr
672 */
673u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
674 u32 reg_addr)
675{
65ba96e9 676 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
677 void __iomem *pcie_index_offset;
678 void __iomem *pcie_data_offset;
65ba96e9
HZ
679 u64 r;
680
681 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
682 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
683
684 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
685 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
686 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
687
688 /* read low 32 bits */
689 writel(reg_addr, pcie_index_offset);
690 readl(pcie_index_offset);
691 r = readl(pcie_data_offset);
692 /* read high 32 bits */
693 writel(reg_addr + 4, pcie_index_offset);
694 readl(pcie_index_offset);
695 r |= ((u64)readl(pcie_data_offset) << 32);
696 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
697
698 return r;
699}
700
701/**
702 * amdgpu_device_indirect_wreg - write an indirect register address
703 *
704 * @adev: amdgpu_device pointer
1bba3683
HZ
705 * @reg_addr: indirect register offset
706 * @reg_data: indirect register data
707 *
708 */
709void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
710 u32 reg_addr, u32 reg_data)
711{
65ba96e9 712 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
713 void __iomem *pcie_index_offset;
714 void __iomem *pcie_data_offset;
715
65ba96e9
HZ
716 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
717 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
718
1bba3683
HZ
719 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
720 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
721 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
722
723 writel(reg_addr, pcie_index_offset);
724 readl(pcie_index_offset);
725 writel(reg_data, pcie_data_offset);
726 readl(pcie_data_offset);
727 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
728}
729
0c552ed3
LM
730void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
731 u64 reg_addr, u32 reg_data)
732{
733 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
734 void __iomem *pcie_index_offset;
735 void __iomem *pcie_index_hi_offset;
736 void __iomem *pcie_data_offset;
737
738 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
739 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
740 if (adev->nbio.funcs->get_pcie_index_hi_offset)
741 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
742 else
743 pcie_index_hi = 0;
744
745 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
746 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
747 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
748 if (pcie_index_hi != 0)
749 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
750 pcie_index_hi * 4;
751
752 writel(reg_addr, pcie_index_offset);
753 readl(pcie_index_offset);
754 if (pcie_index_hi != 0) {
755 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
756 readl(pcie_index_hi_offset);
757 }
758 writel(reg_data, pcie_data_offset);
759 readl(pcie_data_offset);
760
761 /* clear the high bits */
762 if (pcie_index_hi != 0) {
763 writel(0, pcie_index_hi_offset);
764 readl(pcie_index_hi_offset);
765 }
766
767 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
768}
769
1bba3683
HZ
770/**
771 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
772 *
773 * @adev: amdgpu_device pointer
1bba3683
HZ
774 * @reg_addr: indirect register offset
775 * @reg_data: indirect register data
776 *
777 */
778void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
779 u32 reg_addr, u64 reg_data)
780{
65ba96e9 781 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
782 void __iomem *pcie_index_offset;
783 void __iomem *pcie_data_offset;
784
65ba96e9
HZ
785 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
786 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
787
1bba3683
HZ
788 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
789 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
790 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
791
792 /* write low 32 bits */
793 writel(reg_addr, pcie_index_offset);
794 readl(pcie_index_offset);
795 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
796 readl(pcie_data_offset);
797 /* write high 32 bits */
798 writel(reg_addr + 4, pcie_index_offset);
799 readl(pcie_index_offset);
800 writel((u32)(reg_data >> 32), pcie_data_offset);
801 readl(pcie_data_offset);
802 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
803}
804
dabc114e
HZ
805/**
806 * amdgpu_device_get_rev_id - query device rev_id
807 *
808 * @adev: amdgpu_device pointer
809 *
810 * Return device rev_id
811 */
812u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
813{
814 return adev->nbio.funcs->get_rev_id(adev);
815}
816
d38ceaf9
AD
817/**
818 * amdgpu_invalid_rreg - dummy reg read function
819 *
982a820b 820 * @adev: amdgpu_device pointer
d38ceaf9
AD
821 * @reg: offset of register
822 *
823 * Dummy register read function. Used for register blocks
824 * that certain asics don't have (all asics).
825 * Returns the value in the register.
826 */
827static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
828{
829 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
830 BUG();
831 return 0;
832}
833
0c552ed3
LM
834static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
835{
836 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
837 BUG();
838 return 0;
839}
840
d38ceaf9
AD
841/**
842 * amdgpu_invalid_wreg - dummy reg write function
843 *
982a820b 844 * @adev: amdgpu_device pointer
d38ceaf9
AD
845 * @reg: offset of register
846 * @v: value to write to the register
847 *
848 * Dummy register read function. Used for register blocks
849 * that certain asics don't have (all asics).
850 */
851static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
852{
853 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
854 reg, v);
855 BUG();
856}
857
0c552ed3
LM
858static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
859{
860 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
861 reg, v);
862 BUG();
863}
864
4fa1c6a6
TZ
865/**
866 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
867 *
982a820b 868 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
869 * @reg: offset of register
870 *
871 * Dummy register read function. Used for register blocks
872 * that certain asics don't have (all asics).
873 * Returns the value in the register.
874 */
875static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
876{
877 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
878 BUG();
879 return 0;
880}
881
882/**
883 * amdgpu_invalid_wreg64 - dummy reg write function
884 *
982a820b 885 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
886 * @reg: offset of register
887 * @v: value to write to the register
888 *
889 * Dummy register read function. Used for register blocks
890 * that certain asics don't have (all asics).
891 */
892static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
893{
894 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
895 reg, v);
896 BUG();
897}
898
d38ceaf9
AD
899/**
900 * amdgpu_block_invalid_rreg - dummy reg read function
901 *
982a820b 902 * @adev: amdgpu_device pointer
d38ceaf9
AD
903 * @block: offset of instance
904 * @reg: offset of register
905 *
906 * Dummy register read function. Used for register blocks
907 * that certain asics don't have (all asics).
908 * Returns the value in the register.
909 */
910static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
911 uint32_t block, uint32_t reg)
912{
913 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
914 reg, block);
915 BUG();
916 return 0;
917}
918
919/**
920 * amdgpu_block_invalid_wreg - dummy reg write function
921 *
982a820b 922 * @adev: amdgpu_device pointer
d38ceaf9
AD
923 * @block: offset of instance
924 * @reg: offset of register
925 * @v: value to write to the register
926 *
927 * Dummy register read function. Used for register blocks
928 * that certain asics don't have (all asics).
929 */
930static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
931 uint32_t block,
932 uint32_t reg, uint32_t v)
933{
934 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
935 reg, block, v);
936 BUG();
937}
938
4d2997ab
AD
939/**
940 * amdgpu_device_asic_init - Wrapper for atom asic_init
941 *
982a820b 942 * @adev: amdgpu_device pointer
4d2997ab
AD
943 *
944 * Does any asic specific work and then calls atom asic init.
945 */
946static int amdgpu_device_asic_init(struct amdgpu_device *adev)
947{
948 amdgpu_asic_pre_asic_init(adev);
949
5db392a0
LL
950 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
951 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
85d1bcc6
HZ
952 return amdgpu_atomfirmware_asic_init(adev, true);
953 else
954 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
955}
956
e3ecdffa 957/**
7ccfd79f 958 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 959 *
982a820b 960 * @adev: amdgpu_device pointer
e3ecdffa
AD
961 *
962 * Allocates a scratch page of VRAM for use by various things in the
963 * driver.
964 */
7ccfd79f 965static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 966{
7ccfd79f
CK
967 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
968 AMDGPU_GEM_DOMAIN_VRAM |
969 AMDGPU_GEM_DOMAIN_GTT,
970 &adev->mem_scratch.robj,
971 &adev->mem_scratch.gpu_addr,
972 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
973}
974
e3ecdffa 975/**
7ccfd79f 976 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 977 *
982a820b 978 * @adev: amdgpu_device pointer
e3ecdffa
AD
979 *
980 * Frees the VRAM scratch page.
981 */
7ccfd79f 982static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 983{
7ccfd79f 984 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
985}
986
987/**
9c3f2b54 988 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
989 *
990 * @adev: amdgpu_device pointer
991 * @registers: pointer to the register array
992 * @array_size: size of the register array
993 *
994 * Programs an array or registers with and and or masks.
995 * This is a helper for setting golden registers.
996 */
9c3f2b54
AD
997void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
998 const u32 *registers,
999 const u32 array_size)
d38ceaf9
AD
1000{
1001 u32 tmp, reg, and_mask, or_mask;
1002 int i;
1003
1004 if (array_size % 3)
1005 return;
1006
47fc644f 1007 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1008 reg = registers[i + 0];
1009 and_mask = registers[i + 1];
1010 or_mask = registers[i + 2];
1011
1012 if (and_mask == 0xffffffff) {
1013 tmp = or_mask;
1014 } else {
1015 tmp = RREG32(reg);
1016 tmp &= ~and_mask;
e0d07657
HZ
1017 if (adev->family >= AMDGPU_FAMILY_AI)
1018 tmp |= (or_mask & and_mask);
1019 else
1020 tmp |= or_mask;
d38ceaf9
AD
1021 }
1022 WREG32(reg, tmp);
1023 }
1024}
1025
e3ecdffa
AD
1026/**
1027 * amdgpu_device_pci_config_reset - reset the GPU
1028 *
1029 * @adev: amdgpu_device pointer
1030 *
1031 * Resets the GPU using the pci config reset sequence.
1032 * Only applicable to asics prior to vega10.
1033 */
8111c387 1034void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1035{
1036 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1037}
1038
af484df8
AD
1039/**
1040 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1041 *
1042 * @adev: amdgpu_device pointer
1043 *
1044 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1045 */
1046int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1047{
1048 return pci_reset_function(adev->pdev);
1049}
1050
d38ceaf9 1051/*
06ec9070 1052 * amdgpu_device_wb_*()
455a7bc2 1053 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1054 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1055 */
1056
1057/**
06ec9070 1058 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1059 *
1060 * @adev: amdgpu_device pointer
1061 *
1062 * Disables Writeback and frees the Writeback memory (all asics).
1063 * Used at driver shutdown.
1064 */
06ec9070 1065static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1066{
1067 if (adev->wb.wb_obj) {
a76ed485
AD
1068 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1069 &adev->wb.gpu_addr,
1070 (void **)&adev->wb.wb);
d38ceaf9
AD
1071 adev->wb.wb_obj = NULL;
1072 }
1073}
1074
1075/**
03f2abb0 1076 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1077 *
1078 * @adev: amdgpu_device pointer
1079 *
455a7bc2 1080 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1081 * Used at driver startup.
1082 * Returns 0 on success or an -error on failure.
1083 */
06ec9070 1084static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1085{
1086 int r;
1087
1088 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1089 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1090 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1091 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1092 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1093 (void **)&adev->wb.wb);
d38ceaf9
AD
1094 if (r) {
1095 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1096 return r;
1097 }
d38ceaf9
AD
1098
1099 adev->wb.num_wb = AMDGPU_MAX_WB;
1100 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1101
1102 /* clear wb memory */
73469585 1103 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1104 }
1105
1106 return 0;
1107}
1108
1109/**
131b4b36 1110 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1111 *
1112 * @adev: amdgpu_device pointer
1113 * @wb: wb index
1114 *
1115 * Allocate a wb slot for use by the driver (all asics).
1116 * Returns 0 on success or -EINVAL on failure.
1117 */
131b4b36 1118int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1119{
1120 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1121
97407b63 1122 if (offset < adev->wb.num_wb) {
7014285a 1123 __set_bit(offset, adev->wb.used);
63ae07ca 1124 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1125 return 0;
1126 } else {
1127 return -EINVAL;
1128 }
1129}
1130
d38ceaf9 1131/**
131b4b36 1132 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1133 *
1134 * @adev: amdgpu_device pointer
1135 * @wb: wb index
1136 *
1137 * Free a wb slot allocated for use by the driver (all asics)
1138 */
131b4b36 1139void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1140{
73469585 1141 wb >>= 3;
d38ceaf9 1142 if (wb < adev->wb.num_wb)
73469585 1143 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1144}
1145
d6895ad3
CK
1146/**
1147 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1148 *
1149 * @adev: amdgpu_device pointer
1150 *
1151 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1152 * to fail, but if any of the BARs is not accessible after the size we abort
1153 * driver loading by returning -ENODEV.
1154 */
1155int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1156{
453f617a 1157 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1158 struct pci_bus *root;
1159 struct resource *res;
1160 unsigned i;
d6895ad3
CK
1161 u16 cmd;
1162 int r;
1163
822130b5
AB
1164 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1165 return 0;
1166
0c03b912 1167 /* Bypass for VF */
1168 if (amdgpu_sriov_vf(adev))
1169 return 0;
1170
b7221f2b
AD
1171 /* skip if the bios has already enabled large BAR */
1172 if (adev->gmc.real_vram_size &&
1173 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1174 return 0;
1175
31b8adab
CK
1176 /* Check if the root BUS has 64bit memory resources */
1177 root = adev->pdev->bus;
1178 while (root->parent)
1179 root = root->parent;
1180
1181 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1182 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1183 res->start > 0x100000000ull)
1184 break;
1185 }
1186
1187 /* Trying to resize is pointless without a root hub window above 4GB */
1188 if (!res)
1189 return 0;
1190
453f617a
ND
1191 /* Limit the BAR size to what is available */
1192 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1193 rbar_size);
1194
d6895ad3
CK
1195 /* Disable memory decoding while we change the BAR addresses and size */
1196 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1197 pci_write_config_word(adev->pdev, PCI_COMMAND,
1198 cmd & ~PCI_COMMAND_MEMORY);
1199
1200 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1201 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1202 if (adev->asic_type >= CHIP_BONAIRE)
1203 pci_release_resource(adev->pdev, 2);
1204
1205 pci_release_resource(adev->pdev, 0);
1206
1207 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1208 if (r == -ENOSPC)
1209 DRM_INFO("Not enough PCI address space for a large BAR.");
1210 else if (r && r != -ENOTSUPP)
1211 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1212
1213 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1214
1215 /* When the doorbell or fb BAR isn't available we have no chance of
1216 * using the device.
1217 */
43c064db 1218 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1219 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1220 return -ENODEV;
1221
1222 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1223
1224 return 0;
1225}
a05502e5 1226
9535a86a
SZ
1227static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1228{
1229 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) {
1230 return false;
1231 }
1232
1233 return true;
1234}
1235
d38ceaf9
AD
1236/*
1237 * GPU helpers function.
1238 */
1239/**
39c640c0 1240 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1241 *
1242 * @adev: amdgpu_device pointer
1243 *
c836fec5
JQ
1244 * Check if the asic has been initialized (all asics) at driver startup
1245 * or post is needed if hw reset is performed.
1246 * Returns true if need or false if not.
d38ceaf9 1247 */
39c640c0 1248bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1249{
1250 uint32_t reg;
1251
bec86378
ML
1252 if (amdgpu_sriov_vf(adev))
1253 return false;
1254
9535a86a
SZ
1255 if (!amdgpu_device_read_bios(adev))
1256 return false;
1257
bec86378 1258 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1259 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1260 * some old smc fw still need driver do vPost otherwise gpu hang, while
1261 * those smc fw version above 22.15 doesn't have this flaw, so we force
1262 * vpost executed for smc version below 22.15
bec86378
ML
1263 */
1264 if (adev->asic_type == CHIP_FIJI) {
1265 int err;
1266 uint32_t fw_ver;
1267 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1268 /* force vPost if error occured */
1269 if (err)
1270 return true;
1271
1272 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1273 if (fw_ver < 0x00160e00)
1274 return true;
bec86378 1275 }
bec86378 1276 }
91fe77eb 1277
e3c1b071 1278 /* Don't post if we need to reset whole hive on init */
1279 if (adev->gmc.xgmi.pending_reset)
1280 return false;
1281
91fe77eb 1282 if (adev->has_hw_reset) {
1283 adev->has_hw_reset = false;
1284 return true;
1285 }
1286
1287 /* bios scratch used on CIK+ */
1288 if (adev->asic_type >= CHIP_BONAIRE)
1289 return amdgpu_atombios_scratch_need_asic_init(adev);
1290
1291 /* check MEM_SIZE for older asics */
1292 reg = amdgpu_asic_get_config_memsize(adev);
1293
1294 if ((reg != 0) && (reg != 0xffffffff))
1295 return false;
1296
1297 return true;
bec86378
ML
1298}
1299
5d1eb4c4
ML
1300/*
1301 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1302 * speed switching. Until we have confirmation from Intel that a specific host
1303 * supports it, it's safer that we keep it disabled for all.
1304 *
1305 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1306 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1307 */
1308bool amdgpu_device_pcie_dynamic_switching_supported(void)
1309{
1310#if IS_ENABLED(CONFIG_X86)
1311 struct cpuinfo_x86 *c = &cpu_data(0);
1312
1313 if (c->x86_vendor == X86_VENDOR_INTEL)
1314 return false;
1315#endif
1316 return true;
1317}
1318
0ab5d711
ML
1319/**
1320 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1321 *
1322 * @adev: amdgpu_device pointer
1323 *
1324 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1325 * be set for this device.
1326 *
1327 * Returns true if it should be used or false if not.
1328 */
1329bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1330{
1331 switch (amdgpu_aspm) {
1332 case -1:
1333 break;
1334 case 0:
1335 return false;
1336 case 1:
1337 return true;
1338 default:
1339 return false;
1340 }
1341 return pcie_aspm_enabled(adev->pdev);
1342}
1343
3ad5dcfe
KHF
1344bool amdgpu_device_aspm_support_quirk(void)
1345{
1346#if IS_ENABLED(CONFIG_X86)
1347 struct cpuinfo_x86 *c = &cpu_data(0);
1348
1349 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1350#else
1351 return true;
1352#endif
1353}
1354
d38ceaf9
AD
1355/* if we get transitioned to only one device, take VGA back */
1356/**
06ec9070 1357 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1358 *
bf44e8ce 1359 * @pdev: PCI device pointer
d38ceaf9
AD
1360 * @state: enable/disable vga decode
1361 *
1362 * Enable/disable vga decode (all asics).
1363 * Returns VGA resource flags.
1364 */
bf44e8ce
CH
1365static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1366 bool state)
d38ceaf9 1367{
bf44e8ce 1368 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1369 amdgpu_asic_set_vga_state(adev, state);
1370 if (state)
1371 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1372 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1373 else
1374 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1375}
1376
e3ecdffa
AD
1377/**
1378 * amdgpu_device_check_block_size - validate the vm block size
1379 *
1380 * @adev: amdgpu_device pointer
1381 *
1382 * Validates the vm block size specified via module parameter.
1383 * The vm block size defines number of bits in page table versus page directory,
1384 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1385 * page table and the remaining bits are in the page directory.
1386 */
06ec9070 1387static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1388{
1389 /* defines number of bits in page table versus page directory,
1390 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1391 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1392 if (amdgpu_vm_block_size == -1)
1393 return;
a1adf8be 1394
bab4fee7 1395 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1396 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1397 amdgpu_vm_block_size);
97489129 1398 amdgpu_vm_block_size = -1;
a1adf8be 1399 }
a1adf8be
CZ
1400}
1401
e3ecdffa
AD
1402/**
1403 * amdgpu_device_check_vm_size - validate the vm size
1404 *
1405 * @adev: amdgpu_device pointer
1406 *
1407 * Validates the vm size in GB specified via module parameter.
1408 * The VM size is the size of the GPU virtual memory space in GB.
1409 */
06ec9070 1410static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1411{
64dab074
AD
1412 /* no need to check the default value */
1413 if (amdgpu_vm_size == -1)
1414 return;
1415
83ca145d
ZJ
1416 if (amdgpu_vm_size < 1) {
1417 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1418 amdgpu_vm_size);
f3368128 1419 amdgpu_vm_size = -1;
83ca145d 1420 }
83ca145d
ZJ
1421}
1422
7951e376
RZ
1423static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1424{
1425 struct sysinfo si;
a9d4fe2f 1426 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1427 uint64_t total_memory;
1428 uint64_t dram_size_seven_GB = 0x1B8000000;
1429 uint64_t dram_size_three_GB = 0xB8000000;
1430
1431 if (amdgpu_smu_memory_pool_size == 0)
1432 return;
1433
1434 if (!is_os_64) {
1435 DRM_WARN("Not 64-bit OS, feature not supported\n");
1436 goto def_value;
1437 }
1438 si_meminfo(&si);
1439 total_memory = (uint64_t)si.totalram * si.mem_unit;
1440
1441 if ((amdgpu_smu_memory_pool_size == 1) ||
1442 (amdgpu_smu_memory_pool_size == 2)) {
1443 if (total_memory < dram_size_three_GB)
1444 goto def_value1;
1445 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1446 (amdgpu_smu_memory_pool_size == 8)) {
1447 if (total_memory < dram_size_seven_GB)
1448 goto def_value1;
1449 } else {
1450 DRM_WARN("Smu memory pool size not supported\n");
1451 goto def_value;
1452 }
1453 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1454
1455 return;
1456
1457def_value1:
1458 DRM_WARN("No enough system memory\n");
1459def_value:
1460 adev->pm.smu_prv_buffer_size = 0;
1461}
1462
9f6a7857
HR
1463static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1464{
1465 if (!(adev->flags & AMD_IS_APU) ||
1466 adev->asic_type < CHIP_RAVEN)
1467 return 0;
1468
1469 switch (adev->asic_type) {
1470 case CHIP_RAVEN:
1471 if (adev->pdev->device == 0x15dd)
1472 adev->apu_flags |= AMD_APU_IS_RAVEN;
1473 if (adev->pdev->device == 0x15d8)
1474 adev->apu_flags |= AMD_APU_IS_PICASSO;
1475 break;
1476 case CHIP_RENOIR:
1477 if ((adev->pdev->device == 0x1636) ||
1478 (adev->pdev->device == 0x164c))
1479 adev->apu_flags |= AMD_APU_IS_RENOIR;
1480 else
1481 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1482 break;
1483 case CHIP_VANGOGH:
1484 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1485 break;
1486 case CHIP_YELLOW_CARP:
1487 break;
d0f56dc2 1488 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1489 if ((adev->pdev->device == 0x13FE) ||
1490 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1491 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1492 break;
9f6a7857 1493 default:
4eaf21b7 1494 break;
9f6a7857
HR
1495 }
1496
1497 return 0;
1498}
1499
d38ceaf9 1500/**
06ec9070 1501 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1502 *
1503 * @adev: amdgpu_device pointer
1504 *
1505 * Validates certain module parameters and updates
1506 * the associated values used by the driver (all asics).
1507 */
912dfc84 1508static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1509{
5b011235
CZ
1510 if (amdgpu_sched_jobs < 4) {
1511 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1512 amdgpu_sched_jobs);
1513 amdgpu_sched_jobs = 4;
47fc644f 1514 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1515 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1516 amdgpu_sched_jobs);
1517 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1518 }
d38ceaf9 1519
83e74db6 1520 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1521 /* gart size must be greater or equal to 32M */
1522 dev_warn(adev->dev, "gart size (%d) too small\n",
1523 amdgpu_gart_size);
83e74db6 1524 amdgpu_gart_size = -1;
d38ceaf9
AD
1525 }
1526
36d38372 1527 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1528 /* gtt size must be greater or equal to 32M */
36d38372
CK
1529 dev_warn(adev->dev, "gtt size (%d) too small\n",
1530 amdgpu_gtt_size);
1531 amdgpu_gtt_size = -1;
d38ceaf9
AD
1532 }
1533
d07f14be
RH
1534 /* valid range is between 4 and 9 inclusive */
1535 if (amdgpu_vm_fragment_size != -1 &&
1536 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1537 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1538 amdgpu_vm_fragment_size = -1;
1539 }
1540
5d5bd5e3
KW
1541 if (amdgpu_sched_hw_submission < 2) {
1542 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1543 amdgpu_sched_hw_submission);
1544 amdgpu_sched_hw_submission = 2;
1545 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1546 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1547 amdgpu_sched_hw_submission);
1548 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1549 }
1550
2656fd23
AG
1551 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1552 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1553 amdgpu_reset_method = -1;
1554 }
1555
7951e376
RZ
1556 amdgpu_device_check_smu_prv_buffer_size(adev);
1557
06ec9070 1558 amdgpu_device_check_vm_size(adev);
d38ceaf9 1559
06ec9070 1560 amdgpu_device_check_block_size(adev);
6a7f76e7 1561
19aede77 1562 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1563
e3c00faa 1564 return 0;
d38ceaf9
AD
1565}
1566
1567/**
1568 * amdgpu_switcheroo_set_state - set switcheroo state
1569 *
1570 * @pdev: pci dev pointer
1694467b 1571 * @state: vga_switcheroo state
d38ceaf9 1572 *
12024b17 1573 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1574 * the asics before or after it is powered up using ACPI methods.
1575 */
8aba21b7
LT
1576static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1577 enum vga_switcheroo_state state)
d38ceaf9
AD
1578{
1579 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1580 int r;
d38ceaf9 1581
b98c6299 1582 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1583 return;
1584
1585 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1586 pr_info("switched on\n");
d38ceaf9
AD
1587 /* don't suspend or resume card normally */
1588 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1589
8f66090b
TZ
1590 pci_set_power_state(pdev, PCI_D0);
1591 amdgpu_device_load_pci_state(pdev);
1592 r = pci_enable_device(pdev);
de185019
AD
1593 if (r)
1594 DRM_WARN("pci_enable_device failed (%d)\n", r);
1595 amdgpu_device_resume(dev, true);
d38ceaf9 1596
d38ceaf9 1597 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1598 } else {
dd4fa6c1 1599 pr_info("switched off\n");
d38ceaf9 1600 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1601 amdgpu_device_suspend(dev, true);
8f66090b 1602 amdgpu_device_cache_pci_state(pdev);
de185019 1603 /* Shut down the device */
8f66090b
TZ
1604 pci_disable_device(pdev);
1605 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1606 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1607 }
1608}
1609
1610/**
1611 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1612 *
1613 * @pdev: pci dev pointer
1614 *
1615 * Callback for the switcheroo driver. Check of the switcheroo
1616 * state can be changed.
1617 * Returns true if the state can be changed, false if not.
1618 */
1619static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1620{
1621 struct drm_device *dev = pci_get_drvdata(pdev);
1622
1623 /*
1624 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1625 * locking inversion with the driver load path. And the access here is
1626 * completely racy anyway. So don't bother with locking for now.
1627 */
7e13ad89 1628 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1629}
1630
1631static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1632 .set_gpu_state = amdgpu_switcheroo_set_state,
1633 .reprobe = NULL,
1634 .can_switch = amdgpu_switcheroo_can_switch,
1635};
1636
e3ecdffa
AD
1637/**
1638 * amdgpu_device_ip_set_clockgating_state - set the CG state
1639 *
87e3f136 1640 * @dev: amdgpu_device pointer
e3ecdffa
AD
1641 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1642 * @state: clockgating state (gate or ungate)
1643 *
1644 * Sets the requested clockgating state for all instances of
1645 * the hardware IP specified.
1646 * Returns the error code from the last instance.
1647 */
43fa561f 1648int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1649 enum amd_ip_block_type block_type,
1650 enum amd_clockgating_state state)
d38ceaf9 1651{
43fa561f 1652 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1653 int i, r = 0;
1654
1655 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1656 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1657 continue;
c722865a
RZ
1658 if (adev->ip_blocks[i].version->type != block_type)
1659 continue;
1660 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1661 continue;
1662 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1663 (void *)adev, state);
1664 if (r)
1665 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1666 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1667 }
1668 return r;
1669}
1670
e3ecdffa
AD
1671/**
1672 * amdgpu_device_ip_set_powergating_state - set the PG state
1673 *
87e3f136 1674 * @dev: amdgpu_device pointer
e3ecdffa
AD
1675 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1676 * @state: powergating state (gate or ungate)
1677 *
1678 * Sets the requested powergating state for all instances of
1679 * the hardware IP specified.
1680 * Returns the error code from the last instance.
1681 */
43fa561f 1682int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1683 enum amd_ip_block_type block_type,
1684 enum amd_powergating_state state)
d38ceaf9 1685{
43fa561f 1686 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1687 int i, r = 0;
1688
1689 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1690 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1691 continue;
c722865a
RZ
1692 if (adev->ip_blocks[i].version->type != block_type)
1693 continue;
1694 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1695 continue;
1696 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1697 (void *)adev, state);
1698 if (r)
1699 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1700 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1701 }
1702 return r;
1703}
1704
e3ecdffa
AD
1705/**
1706 * amdgpu_device_ip_get_clockgating_state - get the CG state
1707 *
1708 * @adev: amdgpu_device pointer
1709 * @flags: clockgating feature flags
1710 *
1711 * Walks the list of IPs on the device and updates the clockgating
1712 * flags for each IP.
1713 * Updates @flags with the feature flags for each hardware IP where
1714 * clockgating is enabled.
1715 */
2990a1fc 1716void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1717 u64 *flags)
6cb2d4e4
HR
1718{
1719 int i;
1720
1721 for (i = 0; i < adev->num_ip_blocks; i++) {
1722 if (!adev->ip_blocks[i].status.valid)
1723 continue;
1724 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1725 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1726 }
1727}
1728
e3ecdffa
AD
1729/**
1730 * amdgpu_device_ip_wait_for_idle - wait for idle
1731 *
1732 * @adev: amdgpu_device pointer
1733 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1734 *
1735 * Waits for the request hardware IP to be idle.
1736 * Returns 0 for success or a negative error code on failure.
1737 */
2990a1fc
AD
1738int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1739 enum amd_ip_block_type block_type)
5dbbb60b
AD
1740{
1741 int i, r;
1742
1743 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1744 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1745 continue;
a1255107
AD
1746 if (adev->ip_blocks[i].version->type == block_type) {
1747 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1748 if (r)
1749 return r;
1750 break;
1751 }
1752 }
1753 return 0;
1754
1755}
1756
e3ecdffa
AD
1757/**
1758 * amdgpu_device_ip_is_idle - is the hardware IP idle
1759 *
1760 * @adev: amdgpu_device pointer
1761 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1762 *
1763 * Check if the hardware IP is idle or not.
1764 * Returns true if it the IP is idle, false if not.
1765 */
2990a1fc
AD
1766bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1767 enum amd_ip_block_type block_type)
5dbbb60b
AD
1768{
1769 int i;
1770
1771 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1772 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1773 continue;
a1255107
AD
1774 if (adev->ip_blocks[i].version->type == block_type)
1775 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1776 }
1777 return true;
1778
1779}
1780
e3ecdffa
AD
1781/**
1782 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1783 *
1784 * @adev: amdgpu_device pointer
87e3f136 1785 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1786 *
1787 * Returns a pointer to the hardware IP block structure
1788 * if it exists for the asic, otherwise NULL.
1789 */
2990a1fc
AD
1790struct amdgpu_ip_block *
1791amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1792 enum amd_ip_block_type type)
d38ceaf9
AD
1793{
1794 int i;
1795
1796 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1797 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1798 return &adev->ip_blocks[i];
1799
1800 return NULL;
1801}
1802
1803/**
2990a1fc 1804 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1805 *
1806 * @adev: amdgpu_device pointer
5fc3aeeb 1807 * @type: enum amd_ip_block_type
d38ceaf9
AD
1808 * @major: major version
1809 * @minor: minor version
1810 *
1811 * return 0 if equal or greater
1812 * return 1 if smaller or the ip_block doesn't exist
1813 */
2990a1fc
AD
1814int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1815 enum amd_ip_block_type type,
1816 u32 major, u32 minor)
d38ceaf9 1817{
2990a1fc 1818 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1819
a1255107
AD
1820 if (ip_block && ((ip_block->version->major > major) ||
1821 ((ip_block->version->major == major) &&
1822 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1823 return 0;
1824
1825 return 1;
1826}
1827
a1255107 1828/**
2990a1fc 1829 * amdgpu_device_ip_block_add
a1255107
AD
1830 *
1831 * @adev: amdgpu_device pointer
1832 * @ip_block_version: pointer to the IP to add
1833 *
1834 * Adds the IP block driver information to the collection of IPs
1835 * on the asic.
1836 */
2990a1fc
AD
1837int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1838 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1839{
1840 if (!ip_block_version)
1841 return -EINVAL;
1842
7bd939d0
LG
1843 switch (ip_block_version->type) {
1844 case AMD_IP_BLOCK_TYPE_VCN:
1845 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1846 return 0;
1847 break;
1848 case AMD_IP_BLOCK_TYPE_JPEG:
1849 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1850 return 0;
1851 break;
1852 default:
1853 break;
1854 }
1855
e966a725 1856 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1857 ip_block_version->funcs->name);
1858
a1255107
AD
1859 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1860
1861 return 0;
1862}
1863
e3ecdffa
AD
1864/**
1865 * amdgpu_device_enable_virtual_display - enable virtual display feature
1866 *
1867 * @adev: amdgpu_device pointer
1868 *
1869 * Enabled the virtual display feature if the user has enabled it via
1870 * the module parameter virtual_display. This feature provides a virtual
1871 * display hardware on headless boards or in virtualized environments.
1872 * This function parses and validates the configuration string specified by
1873 * the user and configues the virtual display configuration (number of
1874 * virtual connectors, crtcs, etc.) specified.
1875 */
483ef985 1876static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1877{
1878 adev->enable_virtual_display = false;
1879
1880 if (amdgpu_virtual_display) {
8f66090b 1881 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1882 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1883
1884 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1885 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1886 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1887 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1888 if (!strcmp("all", pciaddname)
1889 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1890 long num_crtc;
1891 int res = -1;
1892
9accf2fd 1893 adev->enable_virtual_display = true;
0f66356d
ED
1894
1895 if (pciaddname_tmp)
1896 res = kstrtol(pciaddname_tmp, 10,
1897 &num_crtc);
1898
1899 if (!res) {
1900 if (num_crtc < 1)
1901 num_crtc = 1;
1902 if (num_crtc > 6)
1903 num_crtc = 6;
1904 adev->mode_info.num_crtc = num_crtc;
1905 } else {
1906 adev->mode_info.num_crtc = 1;
1907 }
9accf2fd
ED
1908 break;
1909 }
1910 }
1911
0f66356d
ED
1912 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1913 amdgpu_virtual_display, pci_address_name,
1914 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1915
1916 kfree(pciaddstr);
1917 }
1918}
1919
25263da3
AD
1920void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1921{
1922 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1923 adev->mode_info.num_crtc = 1;
1924 adev->enable_virtual_display = true;
1925 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1926 adev->enable_virtual_display, adev->mode_info.num_crtc);
1927 }
1928}
1929
e3ecdffa
AD
1930/**
1931 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1932 *
1933 * @adev: amdgpu_device pointer
1934 *
1935 * Parses the asic configuration parameters specified in the gpu info
1936 * firmware and makes them availale to the driver for use in configuring
1937 * the asic.
1938 * Returns 0 on success, -EINVAL on failure.
1939 */
e2a75f88
AD
1940static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1941{
e2a75f88 1942 const char *chip_name;
c0a43457 1943 char fw_name[40];
e2a75f88
AD
1944 int err;
1945 const struct gpu_info_firmware_header_v1_0 *hdr;
1946
ab4fe3e1
HR
1947 adev->firmware.gpu_info_fw = NULL;
1948
72de33f8 1949 if (adev->mman.discovery_bin) {
cc375d8c
TY
1950 /*
1951 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1952 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1953 * when DAL no longer needs it.
1954 */
1955 if (adev->asic_type != CHIP_NAVI12)
1956 return 0;
258620d0
AD
1957 }
1958
e2a75f88 1959 switch (adev->asic_type) {
e2a75f88
AD
1960 default:
1961 return 0;
1962 case CHIP_VEGA10:
1963 chip_name = "vega10";
1964 break;
3f76dced
AD
1965 case CHIP_VEGA12:
1966 chip_name = "vega12";
1967 break;
2d2e5e7e 1968 case CHIP_RAVEN:
54f78a76 1969 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1970 chip_name = "raven2";
54f78a76 1971 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1972 chip_name = "picasso";
54c4d17e
FX
1973 else
1974 chip_name = "raven";
2d2e5e7e 1975 break;
65e60f6e
LM
1976 case CHIP_ARCTURUS:
1977 chip_name = "arcturus";
1978 break;
42b325e5
XY
1979 case CHIP_NAVI12:
1980 chip_name = "navi12";
1981 break;
e2a75f88
AD
1982 }
1983
1984 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 1985 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
1986 if (err) {
1987 dev_err(adev->dev,
b31d3063 1988 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
1989 fw_name);
1990 goto out;
1991 }
1992
ab4fe3e1 1993 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1994 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1995
1996 switch (hdr->version_major) {
1997 case 1:
1998 {
1999 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2000 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2001 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2002
cc375d8c
TY
2003 /*
2004 * Should be droped when DAL no longer needs it.
2005 */
2006 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2007 goto parse_soc_bounding_box;
2008
b5ab16bf
AD
2009 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2010 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2011 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2012 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2013 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2014 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2015 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2016 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2017 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2018 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2019 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2020 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2021 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2022 adev->gfx.cu_info.max_waves_per_simd =
2023 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2024 adev->gfx.cu_info.max_scratch_slots_per_cu =
2025 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2026 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2027 if (hdr->version_minor >= 1) {
35c2e910
HZ
2028 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2029 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2030 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2031 adev->gfx.config.num_sc_per_sh =
2032 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2033 adev->gfx.config.num_packer_per_sc =
2034 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2035 }
ec51d3fa
XY
2036
2037parse_soc_bounding_box:
ec51d3fa
XY
2038 /*
2039 * soc bounding box info is not integrated in disocovery table,
258620d0 2040 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2041 */
48321c3d
HW
2042 if (hdr->version_minor == 2) {
2043 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2044 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2045 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2046 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2047 }
e2a75f88
AD
2048 break;
2049 }
2050 default:
2051 dev_err(adev->dev,
2052 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2053 err = -EINVAL;
2054 goto out;
2055 }
2056out:
e2a75f88
AD
2057 return err;
2058}
2059
e3ecdffa
AD
2060/**
2061 * amdgpu_device_ip_early_init - run early init for hardware IPs
2062 *
2063 * @adev: amdgpu_device pointer
2064 *
2065 * Early initialization pass for hardware IPs. The hardware IPs that make
2066 * up each asic are discovered each IP's early_init callback is run. This
2067 * is the first stage in initializing the asic.
2068 * Returns 0 on success, negative error code on failure.
2069 */
06ec9070 2070static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2071{
901e2be2
AD
2072 struct drm_device *dev = adev_to_drm(adev);
2073 struct pci_dev *parent;
aaa36a97 2074 int i, r;
ced69502 2075 bool total;
d38ceaf9 2076
483ef985 2077 amdgpu_device_enable_virtual_display(adev);
a6be7570 2078
00a979f3 2079 if (amdgpu_sriov_vf(adev)) {
00a979f3 2080 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2081 if (r)
2082 return r;
00a979f3
WS
2083 }
2084
d38ceaf9 2085 switch (adev->asic_type) {
33f34802
KW
2086#ifdef CONFIG_DRM_AMDGPU_SI
2087 case CHIP_VERDE:
2088 case CHIP_TAHITI:
2089 case CHIP_PITCAIRN:
2090 case CHIP_OLAND:
2091 case CHIP_HAINAN:
295d0daf 2092 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2093 r = si_set_ip_blocks(adev);
2094 if (r)
2095 return r;
2096 break;
2097#endif
a2e73f56
AD
2098#ifdef CONFIG_DRM_AMDGPU_CIK
2099 case CHIP_BONAIRE:
2100 case CHIP_HAWAII:
2101 case CHIP_KAVERI:
2102 case CHIP_KABINI:
2103 case CHIP_MULLINS:
e1ad2d53 2104 if (adev->flags & AMD_IS_APU)
a2e73f56 2105 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2106 else
2107 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2108
2109 r = cik_set_ip_blocks(adev);
2110 if (r)
2111 return r;
2112 break;
2113#endif
da87c30b
AD
2114 case CHIP_TOPAZ:
2115 case CHIP_TONGA:
2116 case CHIP_FIJI:
2117 case CHIP_POLARIS10:
2118 case CHIP_POLARIS11:
2119 case CHIP_POLARIS12:
2120 case CHIP_VEGAM:
2121 case CHIP_CARRIZO:
2122 case CHIP_STONEY:
2123 if (adev->flags & AMD_IS_APU)
2124 adev->family = AMDGPU_FAMILY_CZ;
2125 else
2126 adev->family = AMDGPU_FAMILY_VI;
2127
2128 r = vi_set_ip_blocks(adev);
2129 if (r)
2130 return r;
2131 break;
d38ceaf9 2132 default:
63352b7f
AD
2133 r = amdgpu_discovery_set_ip_blocks(adev);
2134 if (r)
2135 return r;
2136 break;
d38ceaf9
AD
2137 }
2138
901e2be2
AD
2139 if (amdgpu_has_atpx() &&
2140 (amdgpu_is_atpx_hybrid() ||
2141 amdgpu_has_atpx_dgpu_power_cntl()) &&
2142 ((adev->flags & AMD_IS_APU) == 0) &&
2143 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2144 adev->flags |= AMD_IS_PX;
2145
85ac2021
AD
2146 if (!(adev->flags & AMD_IS_APU)) {
2147 parent = pci_upstream_bridge(adev->pdev);
2148 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2149 }
901e2be2 2150
1884734a 2151
3b94fb10 2152 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2153 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2154 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2155 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2156 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2157
ced69502 2158 total = true;
d38ceaf9
AD
2159 for (i = 0; i < adev->num_ip_blocks; i++) {
2160 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2161 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2162 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2163 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2164 } else {
a1255107
AD
2165 if (adev->ip_blocks[i].version->funcs->early_init) {
2166 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2167 if (r == -ENOENT) {
a1255107 2168 adev->ip_blocks[i].status.valid = false;
2c1a2784 2169 } else if (r) {
a1255107
AD
2170 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2171 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2172 total = false;
2c1a2784 2173 } else {
a1255107 2174 adev->ip_blocks[i].status.valid = true;
2c1a2784 2175 }
974e6b64 2176 } else {
a1255107 2177 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2178 }
d38ceaf9 2179 }
21a249ca
AD
2180 /* get the vbios after the asic_funcs are set up */
2181 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2182 r = amdgpu_device_parse_gpu_info_fw(adev);
2183 if (r)
2184 return r;
2185
21a249ca 2186 /* Read BIOS */
9535a86a
SZ
2187 if (amdgpu_device_read_bios(adev)) {
2188 if (!amdgpu_get_bios(adev))
2189 return -EINVAL;
21a249ca 2190
9535a86a
SZ
2191 r = amdgpu_atombios_init(adev);
2192 if (r) {
2193 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2194 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2195 return r;
2196 }
21a249ca 2197 }
77eabc6f
PJZ
2198
2199 /*get pf2vf msg info at it's earliest time*/
2200 if (amdgpu_sriov_vf(adev))
2201 amdgpu_virt_init_data_exchange(adev);
2202
21a249ca 2203 }
d38ceaf9 2204 }
ced69502
ML
2205 if (!total)
2206 return -ENODEV;
d38ceaf9 2207
00fa4035 2208 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2209 adev->cg_flags &= amdgpu_cg_mask;
2210 adev->pg_flags &= amdgpu_pg_mask;
2211
d38ceaf9
AD
2212 return 0;
2213}
2214
0a4f2520
RZ
2215static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2216{
2217 int i, r;
2218
2219 for (i = 0; i < adev->num_ip_blocks; i++) {
2220 if (!adev->ip_blocks[i].status.sw)
2221 continue;
2222 if (adev->ip_blocks[i].status.hw)
2223 continue;
2224 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2225 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2226 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2227 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2228 if (r) {
2229 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2230 adev->ip_blocks[i].version->funcs->name, r);
2231 return r;
2232 }
2233 adev->ip_blocks[i].status.hw = true;
2234 }
2235 }
2236
2237 return 0;
2238}
2239
2240static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2241{
2242 int i, r;
2243
2244 for (i = 0; i < adev->num_ip_blocks; i++) {
2245 if (!adev->ip_blocks[i].status.sw)
2246 continue;
2247 if (adev->ip_blocks[i].status.hw)
2248 continue;
2249 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2250 if (r) {
2251 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2252 adev->ip_blocks[i].version->funcs->name, r);
2253 return r;
2254 }
2255 adev->ip_blocks[i].status.hw = true;
2256 }
2257
2258 return 0;
2259}
2260
7a3e0bb2
RZ
2261static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2262{
2263 int r = 0;
2264 int i;
80f41f84 2265 uint32_t smu_version;
7a3e0bb2
RZ
2266
2267 if (adev->asic_type >= CHIP_VEGA10) {
2268 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2269 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2270 continue;
2271
e3c1b071 2272 if (!adev->ip_blocks[i].status.sw)
2273 continue;
2274
482f0e53
ML
2275 /* no need to do the fw loading again if already done*/
2276 if (adev->ip_blocks[i].status.hw == true)
2277 break;
2278
53b3f8f4 2279 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2280 r = adev->ip_blocks[i].version->funcs->resume(adev);
2281 if (r) {
2282 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2283 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2284 return r;
2285 }
2286 } else {
2287 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2288 if (r) {
2289 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2290 adev->ip_blocks[i].version->funcs->name, r);
2291 return r;
7a3e0bb2 2292 }
7a3e0bb2 2293 }
482f0e53
ML
2294
2295 adev->ip_blocks[i].status.hw = true;
2296 break;
7a3e0bb2
RZ
2297 }
2298 }
482f0e53 2299
8973d9ec
ED
2300 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2301 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2302
80f41f84 2303 return r;
7a3e0bb2
RZ
2304}
2305
5fd8518d
AG
2306static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2307{
2308 long timeout;
2309 int r, i;
2310
2311 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2312 struct amdgpu_ring *ring = adev->rings[i];
2313
2314 /* No need to setup the GPU scheduler for rings that don't need it */
2315 if (!ring || ring->no_scheduler)
2316 continue;
2317
2318 switch (ring->funcs->type) {
2319 case AMDGPU_RING_TYPE_GFX:
2320 timeout = adev->gfx_timeout;
2321 break;
2322 case AMDGPU_RING_TYPE_COMPUTE:
2323 timeout = adev->compute_timeout;
2324 break;
2325 case AMDGPU_RING_TYPE_SDMA:
2326 timeout = adev->sdma_timeout;
2327 break;
2328 default:
2329 timeout = adev->video_timeout;
2330 break;
2331 }
2332
2333 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2334 ring->num_hw_submission, 0,
8ab62eda
JG
2335 timeout, adev->reset_domain->wq,
2336 ring->sched_score, ring->name,
2337 adev->dev);
5fd8518d
AG
2338 if (r) {
2339 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2340 ring->name);
2341 return r;
2342 }
2343 }
2344
d425c6f4
JZ
2345 amdgpu_xcp_update_partition_sched_list(adev);
2346
5fd8518d
AG
2347 return 0;
2348}
2349
2350
e3ecdffa
AD
2351/**
2352 * amdgpu_device_ip_init - run init for hardware IPs
2353 *
2354 * @adev: amdgpu_device pointer
2355 *
2356 * Main initialization pass for hardware IPs. The list of all the hardware
2357 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2358 * are run. sw_init initializes the software state associated with each IP
2359 * and hw_init initializes the hardware associated with each IP.
2360 * Returns 0 on success, negative error code on failure.
2361 */
06ec9070 2362static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2363{
2364 int i, r;
2365
c030f2e4 2366 r = amdgpu_ras_init(adev);
2367 if (r)
2368 return r;
2369
d38ceaf9 2370 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2371 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2372 continue;
a1255107 2373 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2374 if (r) {
a1255107
AD
2375 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2376 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2377 goto init_failed;
2c1a2784 2378 }
a1255107 2379 adev->ip_blocks[i].status.sw = true;
bfca0289 2380
c1c39032
AD
2381 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2382 /* need to do common hw init early so everything is set up for gmc */
2383 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2384 if (r) {
2385 DRM_ERROR("hw_init %d failed %d\n", i, r);
2386 goto init_failed;
2387 }
2388 adev->ip_blocks[i].status.hw = true;
2389 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2390 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2391 /* Try to reserve bad pages early */
2392 if (amdgpu_sriov_vf(adev))
2393 amdgpu_virt_exchange_data(adev);
2394
7ccfd79f 2395 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2396 if (r) {
7ccfd79f 2397 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2398 goto init_failed;
2c1a2784 2399 }
a1255107 2400 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2401 if (r) {
2402 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2403 goto init_failed;
2c1a2784 2404 }
06ec9070 2405 r = amdgpu_device_wb_init(adev);
2c1a2784 2406 if (r) {
06ec9070 2407 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2408 goto init_failed;
2c1a2784 2409 }
a1255107 2410 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2411
2412 /* right after GMC hw init, we create CSA */
02ff519e 2413 if (adev->gfx.mcbp) {
1e256e27 2414 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2415 AMDGPU_GEM_DOMAIN_VRAM |
2416 AMDGPU_GEM_DOMAIN_GTT,
2417 AMDGPU_CSA_SIZE);
2493664f
ML
2418 if (r) {
2419 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2420 goto init_failed;
2493664f
ML
2421 }
2422 }
d38ceaf9
AD
2423 }
2424 }
2425
c9ffa427 2426 if (amdgpu_sriov_vf(adev))
22c16d25 2427 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2428
533aed27
AG
2429 r = amdgpu_ib_pool_init(adev);
2430 if (r) {
2431 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2432 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2433 goto init_failed;
2434 }
2435
c8963ea4
RZ
2436 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2437 if (r)
72d3f592 2438 goto init_failed;
0a4f2520
RZ
2439
2440 r = amdgpu_device_ip_hw_init_phase1(adev);
2441 if (r)
72d3f592 2442 goto init_failed;
0a4f2520 2443
7a3e0bb2
RZ
2444 r = amdgpu_device_fw_loading(adev);
2445 if (r)
72d3f592 2446 goto init_failed;
7a3e0bb2 2447
0a4f2520
RZ
2448 r = amdgpu_device_ip_hw_init_phase2(adev);
2449 if (r)
72d3f592 2450 goto init_failed;
d38ceaf9 2451
121a2bc6
AG
2452 /*
2453 * retired pages will be loaded from eeprom and reserved here,
2454 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2455 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2456 * for I2C communication which only true at this point.
b82e65a9
GC
2457 *
2458 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2459 * failure from bad gpu situation and stop amdgpu init process
2460 * accordingly. For other failed cases, it will still release all
2461 * the resource and print error message, rather than returning one
2462 * negative value to upper level.
121a2bc6
AG
2463 *
2464 * Note: theoretically, this should be called before all vram allocations
2465 * to protect retired page from abusing
2466 */
b82e65a9
GC
2467 r = amdgpu_ras_recovery_init(adev);
2468 if (r)
2469 goto init_failed;
121a2bc6 2470
cfbb6b00
AG
2471 /**
2472 * In case of XGMI grab extra reference for reset domain for this device
2473 */
a4c63caf 2474 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2475 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2476 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2477 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2478
dfd0287b
LH
2479 if (WARN_ON(!hive)) {
2480 r = -ENOENT;
2481 goto init_failed;
2482 }
2483
46c67660 2484 if (!hive->reset_domain ||
2485 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2486 r = -ENOENT;
2487 amdgpu_put_xgmi_hive(hive);
2488 goto init_failed;
2489 }
2490
2491 /* Drop the early temporary reset domain we created for device */
2492 amdgpu_reset_put_reset_domain(adev->reset_domain);
2493 adev->reset_domain = hive->reset_domain;
9dfa4860 2494 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2495 }
a4c63caf
AG
2496 }
2497 }
2498
5fd8518d
AG
2499 r = amdgpu_device_init_schedulers(adev);
2500 if (r)
2501 goto init_failed;
e3c1b071 2502
2503 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2504 if (!adev->gmc.xgmi.pending_reset) {
2505 kgd2kfd_init_zone_device(adev);
e3c1b071 2506 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2507 }
c6332b97 2508
bd607166
KR
2509 amdgpu_fru_get_product_info(adev);
2510
72d3f592 2511init_failed:
c6332b97 2512
72d3f592 2513 return r;
d38ceaf9
AD
2514}
2515
e3ecdffa
AD
2516/**
2517 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2518 *
2519 * @adev: amdgpu_device pointer
2520 *
2521 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2522 * this function before a GPU reset. If the value is retained after a
2523 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2524 */
06ec9070 2525static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2526{
2527 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2528}
2529
e3ecdffa
AD
2530/**
2531 * amdgpu_device_check_vram_lost - check if vram is valid
2532 *
2533 * @adev: amdgpu_device pointer
2534 *
2535 * Checks the reset magic value written to the gart pointer in VRAM.
2536 * The driver calls this after a GPU reset to see if the contents of
2537 * VRAM is lost or now.
2538 * returns true if vram is lost, false if not.
2539 */
06ec9070 2540static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2541{
dadce777
EQ
2542 if (memcmp(adev->gart.ptr, adev->reset_magic,
2543 AMDGPU_RESET_MAGIC_NUM))
2544 return true;
2545
53b3f8f4 2546 if (!amdgpu_in_reset(adev))
dadce777
EQ
2547 return false;
2548
2549 /*
2550 * For all ASICs with baco/mode1 reset, the VRAM is
2551 * always assumed to be lost.
2552 */
2553 switch (amdgpu_asic_reset_method(adev)) {
2554 case AMD_RESET_METHOD_BACO:
2555 case AMD_RESET_METHOD_MODE1:
2556 return true;
2557 default:
2558 return false;
2559 }
0c49e0b8
CZ
2560}
2561
e3ecdffa 2562/**
1112a46b 2563 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2564 *
2565 * @adev: amdgpu_device pointer
b8b72130 2566 * @state: clockgating state (gate or ungate)
e3ecdffa 2567 *
e3ecdffa 2568 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2569 * set_clockgating_state callbacks are run.
2570 * Late initialization pass enabling clockgating for hardware IPs.
2571 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2572 * Returns 0 on success, negative error code on failure.
2573 */
fdd34271 2574
5d89bb2d
LL
2575int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2576 enum amd_clockgating_state state)
d38ceaf9 2577{
1112a46b 2578 int i, j, r;
d38ceaf9 2579
4a2ba394
SL
2580 if (amdgpu_emu_mode == 1)
2581 return 0;
2582
1112a46b
RZ
2583 for (j = 0; j < adev->num_ip_blocks; j++) {
2584 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2585 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2586 continue;
47198eb7 2587 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2588 if (adev->in_s0ix &&
47198eb7
AD
2589 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2590 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2591 continue;
4a446d55 2592 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2593 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2594 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2595 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2596 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2597 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2598 /* enable clockgating to save power */
a1255107 2599 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2600 state);
4a446d55
AD
2601 if (r) {
2602 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2603 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2604 return r;
2605 }
b0b00ff1 2606 }
d38ceaf9 2607 }
06b18f61 2608
c9f96fd5
RZ
2609 return 0;
2610}
2611
5d89bb2d
LL
2612int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2613 enum amd_powergating_state state)
c9f96fd5 2614{
1112a46b 2615 int i, j, r;
06b18f61 2616
c9f96fd5
RZ
2617 if (amdgpu_emu_mode == 1)
2618 return 0;
2619
1112a46b
RZ
2620 for (j = 0; j < adev->num_ip_blocks; j++) {
2621 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2622 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2623 continue;
47198eb7 2624 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2625 if (adev->in_s0ix &&
47198eb7
AD
2626 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2627 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2628 continue;
c9f96fd5
RZ
2629 /* skip CG for VCE/UVD, it's handled specially */
2630 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2631 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2632 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2633 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2634 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2635 /* enable powergating to save power */
2636 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2637 state);
c9f96fd5
RZ
2638 if (r) {
2639 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2640 adev->ip_blocks[i].version->funcs->name, r);
2641 return r;
2642 }
2643 }
2644 }
2dc80b00
S
2645 return 0;
2646}
2647
beff74bc
AD
2648static int amdgpu_device_enable_mgpu_fan_boost(void)
2649{
2650 struct amdgpu_gpu_instance *gpu_ins;
2651 struct amdgpu_device *adev;
2652 int i, ret = 0;
2653
2654 mutex_lock(&mgpu_info.mutex);
2655
2656 /*
2657 * MGPU fan boost feature should be enabled
2658 * only when there are two or more dGPUs in
2659 * the system
2660 */
2661 if (mgpu_info.num_dgpu < 2)
2662 goto out;
2663
2664 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2665 gpu_ins = &(mgpu_info.gpu_ins[i]);
2666 adev = gpu_ins->adev;
2667 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2668 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2669 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2670 if (ret)
2671 break;
2672
2673 gpu_ins->mgpu_fan_enabled = 1;
2674 }
2675 }
2676
2677out:
2678 mutex_unlock(&mgpu_info.mutex);
2679
2680 return ret;
2681}
2682
e3ecdffa
AD
2683/**
2684 * amdgpu_device_ip_late_init - run late init for hardware IPs
2685 *
2686 * @adev: amdgpu_device pointer
2687 *
2688 * Late initialization pass for hardware IPs. The list of all the hardware
2689 * IPs that make up the asic is walked and the late_init callbacks are run.
2690 * late_init covers any special initialization that an IP requires
2691 * after all of the have been initialized or something that needs to happen
2692 * late in the init process.
2693 * Returns 0 on success, negative error code on failure.
2694 */
06ec9070 2695static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2696{
60599a03 2697 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2698 int i = 0, r;
2699
2700 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2701 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2702 continue;
2703 if (adev->ip_blocks[i].version->funcs->late_init) {
2704 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2705 if (r) {
2706 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2707 adev->ip_blocks[i].version->funcs->name, r);
2708 return r;
2709 }
2dc80b00 2710 }
73f847db 2711 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2712 }
2713
867e24ca 2714 r = amdgpu_ras_late_init(adev);
2715 if (r) {
2716 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2717 return r;
2718 }
2719
a891d239
DL
2720 amdgpu_ras_set_error_query_ready(adev, true);
2721
1112a46b
RZ
2722 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2723 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2724
06ec9070 2725 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2726
beff74bc
AD
2727 r = amdgpu_device_enable_mgpu_fan_boost();
2728 if (r)
2729 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2730
4da8b639 2731 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2732 if (amdgpu_passthrough(adev) &&
2733 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2734 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2735 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2736
2737 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2738 mutex_lock(&mgpu_info.mutex);
2739
2740 /*
2741 * Reset device p-state to low as this was booted with high.
2742 *
2743 * This should be performed only after all devices from the same
2744 * hive get initialized.
2745 *
2746 * However, it's unknown how many device in the hive in advance.
2747 * As this is counted one by one during devices initializations.
2748 *
2749 * So, we wait for all XGMI interlinked devices initialized.
2750 * This may bring some delays as those devices may come from
2751 * different hives. But that should be OK.
2752 */
2753 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2754 for (i = 0; i < mgpu_info.num_gpu; i++) {
2755 gpu_instance = &(mgpu_info.gpu_ins[i]);
2756 if (gpu_instance->adev->flags & AMD_IS_APU)
2757 continue;
2758
d84a430d
JK
2759 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2760 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2761 if (r) {
2762 DRM_ERROR("pstate setting failed (%d).\n", r);
2763 break;
2764 }
2765 }
2766 }
2767
2768 mutex_unlock(&mgpu_info.mutex);
2769 }
2770
d38ceaf9
AD
2771 return 0;
2772}
2773
613aa3ea
LY
2774/**
2775 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2776 *
2777 * @adev: amdgpu_device pointer
2778 *
2779 * For ASICs need to disable SMC first
2780 */
2781static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2782{
2783 int i, r;
2784
2785 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2786 return;
2787
2788 for (i = 0; i < adev->num_ip_blocks; i++) {
2789 if (!adev->ip_blocks[i].status.hw)
2790 continue;
2791 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2792 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2793 /* XXX handle errors */
2794 if (r) {
2795 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2796 adev->ip_blocks[i].version->funcs->name, r);
2797 }
2798 adev->ip_blocks[i].status.hw = false;
2799 break;
2800 }
2801 }
2802}
2803
e9669fb7 2804static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2805{
2806 int i, r;
2807
e9669fb7
AG
2808 for (i = 0; i < adev->num_ip_blocks; i++) {
2809 if (!adev->ip_blocks[i].version->funcs->early_fini)
2810 continue;
5278a159 2811
e9669fb7
AG
2812 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2813 if (r) {
2814 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2815 adev->ip_blocks[i].version->funcs->name, r);
2816 }
2817 }
c030f2e4 2818
05df1f01 2819 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2820 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2821
7270e895
TY
2822 amdgpu_amdkfd_suspend(adev, false);
2823
613aa3ea
LY
2824 /* Workaroud for ASICs need to disable SMC first */
2825 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2826
d38ceaf9 2827 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2828 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2829 continue;
8201a67a 2830
a1255107 2831 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2832 /* XXX handle errors */
2c1a2784 2833 if (r) {
a1255107
AD
2834 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2835 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2836 }
8201a67a 2837
a1255107 2838 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2839 }
2840
6effad8a
GC
2841 if (amdgpu_sriov_vf(adev)) {
2842 if (amdgpu_virt_release_full_gpu(adev, false))
2843 DRM_ERROR("failed to release exclusive mode on fini\n");
2844 }
2845
e9669fb7
AG
2846 return 0;
2847}
2848
2849/**
2850 * amdgpu_device_ip_fini - run fini for hardware IPs
2851 *
2852 * @adev: amdgpu_device pointer
2853 *
2854 * Main teardown pass for hardware IPs. The list of all the hardware
2855 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2856 * are run. hw_fini tears down the hardware associated with each IP
2857 * and sw_fini tears down any software state associated with each IP.
2858 * Returns 0 on success, negative error code on failure.
2859 */
2860static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2861{
2862 int i, r;
2863
2864 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2865 amdgpu_virt_release_ras_err_handler_data(adev);
2866
e9669fb7
AG
2867 if (adev->gmc.xgmi.num_physical_nodes > 1)
2868 amdgpu_xgmi_remove_device(adev);
2869
c004d44e 2870 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2871
d38ceaf9 2872 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2873 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2874 continue;
c12aba3a
ML
2875
2876 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2877 amdgpu_ucode_free_bo(adev);
1e256e27 2878 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2879 amdgpu_device_wb_fini(adev);
7ccfd79f 2880 amdgpu_device_mem_scratch_fini(adev);
533aed27 2881 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2882 }
2883
a1255107 2884 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2885 /* XXX handle errors */
2c1a2784 2886 if (r) {
a1255107
AD
2887 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2888 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2889 }
a1255107
AD
2890 adev->ip_blocks[i].status.sw = false;
2891 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2892 }
2893
a6dcfd9c 2894 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2895 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2896 continue;
a1255107
AD
2897 if (adev->ip_blocks[i].version->funcs->late_fini)
2898 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2899 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2900 }
2901
c030f2e4 2902 amdgpu_ras_fini(adev);
2903
d38ceaf9
AD
2904 return 0;
2905}
2906
e3ecdffa 2907/**
beff74bc 2908 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2909 *
1112a46b 2910 * @work: work_struct.
e3ecdffa 2911 */
beff74bc 2912static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2913{
2914 struct amdgpu_device *adev =
beff74bc 2915 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2916 int r;
2917
2918 r = amdgpu_ib_ring_tests(adev);
2919 if (r)
2920 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2921}
2922
1e317b99
RZ
2923static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2924{
2925 struct amdgpu_device *adev =
2926 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2927
90a92662
MD
2928 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2929 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2930
2931 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2932 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2933}
2934
e3ecdffa 2935/**
e7854a03 2936 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2937 *
2938 * @adev: amdgpu_device pointer
2939 *
2940 * Main suspend function for hardware IPs. The list of all the hardware
2941 * IPs that make up the asic is walked, clockgating is disabled and the
2942 * suspend callbacks are run. suspend puts the hardware and software state
2943 * in each IP into a state suitable for suspend.
2944 * Returns 0 on success, negative error code on failure.
2945 */
e7854a03
AD
2946static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2947{
2948 int i, r;
2949
50ec83f0
AD
2950 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2951 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2952
b31d6ada
EQ
2953 /*
2954 * Per PMFW team's suggestion, driver needs to handle gfxoff
2955 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2956 * scenario. Add the missing df cstate disablement here.
2957 */
2958 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2959 dev_warn(adev->dev, "Failed to disallow df cstate");
2960
e7854a03
AD
2961 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2962 if (!adev->ip_blocks[i].status.valid)
2963 continue;
2b9f7848 2964
e7854a03 2965 /* displays are handled separately */
2b9f7848
ND
2966 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2967 continue;
2968
2969 /* XXX handle errors */
2970 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2971 /* XXX handle errors */
2972 if (r) {
2973 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2974 adev->ip_blocks[i].version->funcs->name, r);
2975 return r;
e7854a03 2976 }
2b9f7848
ND
2977
2978 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2979 }
2980
e7854a03
AD
2981 return 0;
2982}
2983
2984/**
2985 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2986 *
2987 * @adev: amdgpu_device pointer
2988 *
2989 * Main suspend function for hardware IPs. The list of all the hardware
2990 * IPs that make up the asic is walked, clockgating is disabled and the
2991 * suspend callbacks are run. suspend puts the hardware and software state
2992 * in each IP into a state suitable for suspend.
2993 * Returns 0 on success, negative error code on failure.
2994 */
2995static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2996{
2997 int i, r;
2998
557f42a2 2999 if (adev->in_s0ix)
bc143d8b 3000 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3001
d38ceaf9 3002 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3003 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3004 continue;
e7854a03
AD
3005 /* displays are handled in phase1 */
3006 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3007 continue;
bff77e86
LM
3008 /* PSP lost connection when err_event_athub occurs */
3009 if (amdgpu_ras_intr_triggered() &&
3010 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3011 adev->ip_blocks[i].status.hw = false;
3012 continue;
3013 }
e3c1b071 3014
3015 /* skip unnecessary suspend if we do not initialize them yet */
3016 if (adev->gmc.xgmi.pending_reset &&
3017 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3018 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3019 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3020 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3021 adev->ip_blocks[i].status.hw = false;
3022 continue;
3023 }
557f42a2 3024
afa6646b 3025 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3026 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3027 * like at runtime. PSP is also part of the always on hardware
3028 * so no need to suspend it.
3029 */
557f42a2 3030 if (adev->in_s0ix &&
32ff160d 3031 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3032 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3033 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3034 continue;
3035
2a7798ea
AD
3036 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3037 if (adev->in_s0ix &&
3038 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3039 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3040 continue;
3041
e11c7750
TH
3042 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3043 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3044 * from this location and RLC Autoload automatically also gets loaded
3045 * from here based on PMFW -> PSP message during re-init sequence.
3046 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3047 * the TMR and reload FWs again for IMU enabled APU ASICs.
3048 */
3049 if (amdgpu_in_reset(adev) &&
3050 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3051 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3052 continue;
3053
d38ceaf9 3054 /* XXX handle errors */
a1255107 3055 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3056 /* XXX handle errors */
2c1a2784 3057 if (r) {
a1255107
AD
3058 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3059 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3060 }
876923fb 3061 adev->ip_blocks[i].status.hw = false;
a3a09142 3062 /* handle putting the SMC in the appropriate state */
47fc644f 3063 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3064 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3065 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3066 if (r) {
3067 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3068 adev->mp1_state, r);
3069 return r;
3070 }
a3a09142
AD
3071 }
3072 }
d38ceaf9
AD
3073 }
3074
3075 return 0;
3076}
3077
e7854a03
AD
3078/**
3079 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3080 *
3081 * @adev: amdgpu_device pointer
3082 *
3083 * Main suspend function for hardware IPs. The list of all the hardware
3084 * IPs that make up the asic is walked, clockgating is disabled and the
3085 * suspend callbacks are run. suspend puts the hardware and software state
3086 * in each IP into a state suitable for suspend.
3087 * Returns 0 on success, negative error code on failure.
3088 */
3089int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3090{
3091 int r;
3092
3c73683c
JC
3093 if (amdgpu_sriov_vf(adev)) {
3094 amdgpu_virt_fini_data_exchange(adev);
e7819644 3095 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3096 }
e7819644 3097
e7854a03
AD
3098 r = amdgpu_device_ip_suspend_phase1(adev);
3099 if (r)
3100 return r;
3101 r = amdgpu_device_ip_suspend_phase2(adev);
3102
e7819644
YT
3103 if (amdgpu_sriov_vf(adev))
3104 amdgpu_virt_release_full_gpu(adev, false);
3105
e7854a03
AD
3106 return r;
3107}
3108
06ec9070 3109static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3110{
3111 int i, r;
3112
2cb681b6 3113 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3114 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3115 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3116 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3117 AMD_IP_BLOCK_TYPE_IH,
3118 };
a90ad3c2 3119
95ea3dbc 3120 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3121 int j;
3122 struct amdgpu_ip_block *block;
a90ad3c2 3123
4cd2a96d
J
3124 block = &adev->ip_blocks[i];
3125 block->status.hw = false;
2cb681b6 3126
4cd2a96d 3127 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3128
4cd2a96d 3129 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3130 !block->status.valid)
3131 continue;
3132
3133 r = block->version->funcs->hw_init(adev);
0aaeefcc 3134 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3135 if (r)
3136 return r;
482f0e53 3137 block->status.hw = true;
a90ad3c2
ML
3138 }
3139 }
3140
3141 return 0;
3142}
3143
06ec9070 3144static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3145{
3146 int i, r;
3147
2cb681b6
ML
3148 static enum amd_ip_block_type ip_order[] = {
3149 AMD_IP_BLOCK_TYPE_SMC,
3150 AMD_IP_BLOCK_TYPE_DCE,
3151 AMD_IP_BLOCK_TYPE_GFX,
3152 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3153 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3154 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3155 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3156 AMD_IP_BLOCK_TYPE_VCN,
3157 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3158 };
a90ad3c2 3159
2cb681b6
ML
3160 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3161 int j;
3162 struct amdgpu_ip_block *block;
a90ad3c2 3163
2cb681b6
ML
3164 for (j = 0; j < adev->num_ip_blocks; j++) {
3165 block = &adev->ip_blocks[j];
3166
3167 if (block->version->type != ip_order[i] ||
482f0e53
ML
3168 !block->status.valid ||
3169 block->status.hw)
2cb681b6
ML
3170 continue;
3171
895bd048
JZ
3172 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3173 r = block->version->funcs->resume(adev);
3174 else
3175 r = block->version->funcs->hw_init(adev);
3176
0aaeefcc 3177 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3178 if (r)
3179 return r;
482f0e53 3180 block->status.hw = true;
a90ad3c2
ML
3181 }
3182 }
3183
3184 return 0;
3185}
3186
e3ecdffa
AD
3187/**
3188 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3189 *
3190 * @adev: amdgpu_device pointer
3191 *
3192 * First resume function for hardware IPs. The list of all the hardware
3193 * IPs that make up the asic is walked and the resume callbacks are run for
3194 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3195 * after a suspend and updates the software state as necessary. This
3196 * function is also used for restoring the GPU after a GPU reset.
3197 * Returns 0 on success, negative error code on failure.
3198 */
06ec9070 3199static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3200{
3201 int i, r;
3202
a90ad3c2 3203 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3204 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3205 continue;
a90ad3c2 3206 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3207 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3208 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3209 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3210
fcf0649f
CZ
3211 r = adev->ip_blocks[i].version->funcs->resume(adev);
3212 if (r) {
3213 DRM_ERROR("resume of IP block <%s> failed %d\n",
3214 adev->ip_blocks[i].version->funcs->name, r);
3215 return r;
3216 }
482f0e53 3217 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3218 }
3219 }
3220
3221 return 0;
3222}
3223
e3ecdffa
AD
3224/**
3225 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3226 *
3227 * @adev: amdgpu_device pointer
3228 *
3229 * First resume function for hardware IPs. The list of all the hardware
3230 * IPs that make up the asic is walked and the resume callbacks are run for
3231 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3232 * functional state after a suspend and updates the software state as
3233 * necessary. This function is also used for restoring the GPU after a GPU
3234 * reset.
3235 * Returns 0 on success, negative error code on failure.
3236 */
06ec9070 3237static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3238{
3239 int i, r;
3240
3241 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3242 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3243 continue;
fcf0649f 3244 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3245 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3246 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3247 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3248 continue;
a1255107 3249 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3250 if (r) {
a1255107
AD
3251 DRM_ERROR("resume of IP block <%s> failed %d\n",
3252 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3253 return r;
2c1a2784 3254 }
482f0e53 3255 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3256 }
3257
3258 return 0;
3259}
3260
e3ecdffa
AD
3261/**
3262 * amdgpu_device_ip_resume - run resume for hardware IPs
3263 *
3264 * @adev: amdgpu_device pointer
3265 *
3266 * Main resume function for hardware IPs. The hardware IPs
3267 * are split into two resume functions because they are
3268 * are also used in in recovering from a GPU reset and some additional
3269 * steps need to be take between them. In this case (S3/S4) they are
3270 * run sequentially.
3271 * Returns 0 on success, negative error code on failure.
3272 */
06ec9070 3273static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3274{
3275 int r;
3276
f2206741
AL
3277 if (!adev->in_s0ix) {
3278 r = amdgpu_amdkfd_resume_iommu(adev);
3279 if (r)
3280 return r;
3281 }
9cec53c1 3282
06ec9070 3283 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3284 if (r)
3285 return r;
7a3e0bb2
RZ
3286
3287 r = amdgpu_device_fw_loading(adev);
3288 if (r)
3289 return r;
3290
06ec9070 3291 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3292
3293 return r;
3294}
3295
e3ecdffa
AD
3296/**
3297 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3298 *
3299 * @adev: amdgpu_device pointer
3300 *
3301 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3302 */
4e99a44e 3303static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3304{
6867e1b5
ML
3305 if (amdgpu_sriov_vf(adev)) {
3306 if (adev->is_atom_fw) {
58ff791a 3307 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3308 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3309 } else {
3310 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3311 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3312 }
3313
3314 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3315 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3316 }
048765ad
AR
3317}
3318
e3ecdffa
AD
3319/**
3320 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3321 *
3322 * @asic_type: AMD asic type
3323 *
3324 * Check if there is DC (new modesetting infrastructre) support for an asic.
3325 * returns true if DC has support, false if not.
3326 */
4562236b
HW
3327bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3328{
3329 switch (asic_type) {
0637d417
AD
3330#ifdef CONFIG_DRM_AMDGPU_SI
3331 case CHIP_HAINAN:
3332#endif
3333 case CHIP_TOPAZ:
3334 /* chips with no display hardware */
3335 return false;
4562236b 3336#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3337 case CHIP_TAHITI:
3338 case CHIP_PITCAIRN:
3339 case CHIP_VERDE:
3340 case CHIP_OLAND:
2d32ffd6
AD
3341 /*
3342 * We have systems in the wild with these ASICs that require
3343 * LVDS and VGA support which is not supported with DC.
3344 *
3345 * Fallback to the non-DC driver here by default so as not to
3346 * cause regressions.
3347 */
3348#if defined(CONFIG_DRM_AMD_DC_SI)
3349 return amdgpu_dc > 0;
3350#else
3351 return false;
64200c46 3352#endif
4562236b 3353 case CHIP_BONAIRE:
0d6fbccb 3354 case CHIP_KAVERI:
367e6687
AD
3355 case CHIP_KABINI:
3356 case CHIP_MULLINS:
d9fda248
HW
3357 /*
3358 * We have systems in the wild with these ASICs that require
b5a0168e 3359 * VGA support which is not supported with DC.
d9fda248
HW
3360 *
3361 * Fallback to the non-DC driver here by default so as not to
3362 * cause regressions.
3363 */
3364 return amdgpu_dc > 0;
f7f12b25 3365 default:
fd187853 3366 return amdgpu_dc != 0;
f7f12b25 3367#else
4562236b 3368 default:
93b09a9a 3369 if (amdgpu_dc > 0)
044a48f4 3370 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3371 "but isn't supported by ASIC, ignoring\n");
4562236b 3372 return false;
f7f12b25 3373#endif
4562236b
HW
3374 }
3375}
3376
3377/**
3378 * amdgpu_device_has_dc_support - check if dc is supported
3379 *
982a820b 3380 * @adev: amdgpu_device pointer
4562236b
HW
3381 *
3382 * Returns true for supported, false for not supported
3383 */
3384bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3385{
25263da3 3386 if (adev->enable_virtual_display ||
abaf210c 3387 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3388 return false;
3389
4562236b
HW
3390 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3391}
3392
d4535e2c
AG
3393static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3394{
3395 struct amdgpu_device *adev =
3396 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3397 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3398
c6a6e2db
AG
3399 /* It's a bug to not have a hive within this function */
3400 if (WARN_ON(!hive))
3401 return;
3402
3403 /*
3404 * Use task barrier to synchronize all xgmi reset works across the
3405 * hive. task_barrier_enter and task_barrier_exit will block
3406 * until all the threads running the xgmi reset works reach
3407 * those points. task_barrier_full will do both blocks.
3408 */
3409 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3410
3411 task_barrier_enter(&hive->tb);
4a580877 3412 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3413
3414 if (adev->asic_reset_res)
3415 goto fail;
3416
3417 task_barrier_exit(&hive->tb);
4a580877 3418 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3419
3420 if (adev->asic_reset_res)
3421 goto fail;
43c4d576 3422
5e67bba3 3423 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3424 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3425 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3426 } else {
3427
3428 task_barrier_full(&hive->tb);
3429 adev->asic_reset_res = amdgpu_asic_reset(adev);
3430 }
ce316fa5 3431
c6a6e2db 3432fail:
d4535e2c 3433 if (adev->asic_reset_res)
fed184e9 3434 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3435 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3436 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3437}
3438
71f98027
AD
3439static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3440{
3441 char *input = amdgpu_lockup_timeout;
3442 char *timeout_setting = NULL;
3443 int index = 0;
3444 long timeout;
3445 int ret = 0;
3446
3447 /*
67387dfe
AD
3448 * By default timeout for non compute jobs is 10000
3449 * and 60000 for compute jobs.
71f98027 3450 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3451 * jobs are 60000 by default.
71f98027
AD
3452 */
3453 adev->gfx_timeout = msecs_to_jiffies(10000);
3454 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3455 if (amdgpu_sriov_vf(adev))
3456 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3457 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3458 else
67387dfe 3459 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3460
f440ff44 3461 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3462 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3463 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3464 ret = kstrtol(timeout_setting, 0, &timeout);
3465 if (ret)
3466 return ret;
3467
3468 if (timeout == 0) {
3469 index++;
3470 continue;
3471 } else if (timeout < 0) {
3472 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3473 dev_warn(adev->dev, "lockup timeout disabled");
3474 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3475 } else {
3476 timeout = msecs_to_jiffies(timeout);
3477 }
3478
3479 switch (index++) {
3480 case 0:
3481 adev->gfx_timeout = timeout;
3482 break;
3483 case 1:
3484 adev->compute_timeout = timeout;
3485 break;
3486 case 2:
3487 adev->sdma_timeout = timeout;
3488 break;
3489 case 3:
3490 adev->video_timeout = timeout;
3491 break;
3492 default:
3493 break;
3494 }
3495 }
3496 /*
3497 * There is only one value specified and
3498 * it should apply to all non-compute jobs.
3499 */
bcccee89 3500 if (index == 1) {
71f98027 3501 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3502 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3503 adev->compute_timeout = adev->gfx_timeout;
3504 }
71f98027
AD
3505 }
3506
3507 return ret;
3508}
d4535e2c 3509
4a74c38c
PY
3510/**
3511 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3512 *
3513 * @adev: amdgpu_device pointer
3514 *
3515 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3516 */
3517static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3518{
3519 struct iommu_domain *domain;
3520
3521 domain = iommu_get_domain_for_dev(adev->dev);
3522 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3523 adev->ram_is_direct_mapped = true;
3524}
3525
77f3a5cd
ND
3526static const struct attribute *amdgpu_dev_attributes[] = {
3527 &dev_attr_product_name.attr,
3528 &dev_attr_product_number.attr,
3529 &dev_attr_serial_number.attr,
3530 &dev_attr_pcie_replay_count.attr,
3531 NULL
3532};
3533
02ff519e
AD
3534static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3535{
3536 if (amdgpu_mcbp == 1)
3537 adev->gfx.mcbp = true;
3538
50a7c876
AD
3539 if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
3540 (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
3541 adev->gfx.num_gfx_rings)
3542 adev->gfx.mcbp = true;
3543
02ff519e
AD
3544 if (amdgpu_sriov_vf(adev))
3545 adev->gfx.mcbp = true;
3546
3547 if (adev->gfx.mcbp)
3548 DRM_INFO("MCBP is enabled\n");
3549}
3550
d38ceaf9
AD
3551/**
3552 * amdgpu_device_init - initialize the driver
3553 *
3554 * @adev: amdgpu_device pointer
d38ceaf9
AD
3555 * @flags: driver flags
3556 *
3557 * Initializes the driver info and hw (all asics).
3558 * Returns 0 for success or an error on failure.
3559 * Called at driver startup.
3560 */
3561int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3562 uint32_t flags)
3563{
8aba21b7
LT
3564 struct drm_device *ddev = adev_to_drm(adev);
3565 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3566 int r, i;
b98c6299 3567 bool px = false;
95844d20 3568 u32 max_MBps;
59e9fff1 3569 int tmp;
d38ceaf9
AD
3570
3571 adev->shutdown = false;
d38ceaf9 3572 adev->flags = flags;
4e66d7d2
YZ
3573
3574 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3575 adev->asic_type = amdgpu_force_asic_type;
3576 else
3577 adev->asic_type = flags & AMD_ASIC_MASK;
3578
d38ceaf9 3579 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3580 if (amdgpu_emu_mode == 1)
8bdab6bb 3581 adev->usec_timeout *= 10;
770d13b1 3582 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3583 adev->accel_working = false;
3584 adev->num_rings = 0;
68ce8b24 3585 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3586 adev->mman.buffer_funcs = NULL;
3587 adev->mman.buffer_funcs_ring = NULL;
3588 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3589 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3590 adev->gmc.gmc_funcs = NULL;
7bd939d0 3591 adev->harvest_ip_mask = 0x0;
f54d1867 3592 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3593 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3594
3595 adev->smc_rreg = &amdgpu_invalid_rreg;
3596 adev->smc_wreg = &amdgpu_invalid_wreg;
3597 adev->pcie_rreg = &amdgpu_invalid_rreg;
3598 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3599 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3600 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3601 adev->pciep_rreg = &amdgpu_invalid_rreg;
3602 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3603 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3604 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3605 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3606 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3607 adev->didt_rreg = &amdgpu_invalid_rreg;
3608 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3609 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3610 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3611 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3612 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3613
3e39ab90
AD
3614 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3615 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3616 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3617
3618 /* mutex initialization are all done here so we
3619 * can recall function without having locking issues */
0e5ca0d1 3620 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3621 mutex_init(&adev->pm.mutex);
3622 mutex_init(&adev->gfx.gpu_clock_mutex);
3623 mutex_init(&adev->srbm_mutex);
b8866c26 3624 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3625 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3626 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3627 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3628 mutex_init(&adev->mn_lock);
e23b74aa 3629 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3630 hash_init(adev->mn_hash);
32eaeae0 3631 mutex_init(&adev->psp.mutex);
bd052211 3632 mutex_init(&adev->notifier_lock);
8cda7a4f 3633 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3634 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3635
ab3b9de6 3636 amdgpu_device_init_apu_flags(adev);
9f6a7857 3637
912dfc84
EQ
3638 r = amdgpu_device_check_arguments(adev);
3639 if (r)
3640 return r;
d38ceaf9 3641
d38ceaf9
AD
3642 spin_lock_init(&adev->mmio_idx_lock);
3643 spin_lock_init(&adev->smc_idx_lock);
3644 spin_lock_init(&adev->pcie_idx_lock);
3645 spin_lock_init(&adev->uvd_ctx_idx_lock);
3646 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3647 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3648 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3649 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3650 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3651
0c4e7fa5
CZ
3652 INIT_LIST_HEAD(&adev->shadow_list);
3653 mutex_init(&adev->shadow_list_lock);
3654
655ce9cb 3655 INIT_LIST_HEAD(&adev->reset_list);
3656
6492e1b0 3657 INIT_LIST_HEAD(&adev->ras_list);
3658
beff74bc
AD
3659 INIT_DELAYED_WORK(&adev->delayed_init_work,
3660 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3661 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3662 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3663
d4535e2c
AG
3664 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3665
d23ee13f 3666 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3667 adev->gfx.gfx_off_residency = 0;
3668 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3669 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3670
b265bdbd
EQ
3671 atomic_set(&adev->throttling_logging_enabled, 1);
3672 /*
3673 * If throttling continues, logging will be performed every minute
3674 * to avoid log flooding. "-1" is subtracted since the thermal
3675 * throttling interrupt comes every second. Thus, the total logging
3676 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3677 * for throttling interrupt) = 60 seconds.
3678 */
3679 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3680 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3681
0fa49558
AX
3682 /* Registers mapping */
3683 /* TODO: block userspace mapping of io register */
da69c161
KW
3684 if (adev->asic_type >= CHIP_BONAIRE) {
3685 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3686 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3687 } else {
3688 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3689 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3690 }
d38ceaf9 3691
6c08e0ef
EQ
3692 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3693 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3694
d38ceaf9
AD
3695 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3696 if (adev->rmmio == NULL) {
3697 return -ENOMEM;
3698 }
3699 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3700 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3701
436afdfa
PY
3702 /*
3703 * Reset domain needs to be present early, before XGMI hive discovered
3704 * (if any) and intitialized to use reset sem and in_gpu reset flag
3705 * early on during init and before calling to RREG32.
3706 */
3707 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3708 if (!adev->reset_domain)
3709 return -ENOMEM;
3710
3aa0115d
ML
3711 /* detect hw virtualization here */
3712 amdgpu_detect_virtualization(adev);
3713
04e85958
TL
3714 amdgpu_device_get_pcie_info(adev);
3715
dffa11b4
ML
3716 r = amdgpu_device_get_job_timeout_settings(adev);
3717 if (r) {
3718 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3719 return r;
a190d1c7
XY
3720 }
3721
d38ceaf9 3722 /* early init functions */
06ec9070 3723 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3724 if (r)
4ef87d8f 3725 return r;
d38ceaf9 3726
02ff519e
AD
3727 amdgpu_device_set_mcbp(adev);
3728
b7cdb41e
ML
3729 /* Get rid of things like offb */
3730 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3731 if (r)
3732 return r;
3733
4d33e704
SK
3734 /* Enable TMZ based on IP_VERSION */
3735 amdgpu_gmc_tmz_set(adev);
3736
957b0787 3737 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3738 /* Need to get xgmi info early to decide the reset behavior*/
3739 if (adev->gmc.xgmi.supported) {
3740 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3741 if (r)
3742 return r;
3743 }
3744
8e6d0b69 3745 /* enable PCIE atomic ops */
b4520bfd
GW
3746 if (amdgpu_sriov_vf(adev)) {
3747 if (adev->virt.fw_reserve.p_pf2vf)
3748 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3749 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3750 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3751 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3752 * internal path natively support atomics, set have_atomics_support to true.
3753 */
b4520bfd
GW
3754 } else if ((adev->flags & AMD_IS_APU) &&
3755 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
0e768043 3756 adev->have_atomics_support = true;
b4520bfd 3757 } else {
8e6d0b69 3758 adev->have_atomics_support =
3759 !pci_enable_atomic_ops_to_root(adev->pdev,
3760 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3761 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3762 }
3763
8e6d0b69 3764 if (!adev->have_atomics_support)
3765 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3766
6585661d 3767 /* doorbell bar mapping and doorbell index init*/
43c064db 3768 amdgpu_doorbell_init(adev);
6585661d 3769
9475a943
SL
3770 if (amdgpu_emu_mode == 1) {
3771 /* post the asic on emulation mode */
3772 emu_soc_asic_init(adev);
bfca0289 3773 goto fence_driver_init;
9475a943 3774 }
bfca0289 3775
04442bf7
LL
3776 amdgpu_reset_init(adev);
3777
4e99a44e 3778 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3779 if (adev->bios)
3780 amdgpu_device_detect_sriov_bios(adev);
048765ad 3781
95e8e59e
AD
3782 /* check if we need to reset the asic
3783 * E.g., driver was not cleanly unloaded previously, etc.
3784 */
f14899fd 3785 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3786 if (adev->gmc.xgmi.num_physical_nodes) {
3787 dev_info(adev->dev, "Pending hive reset.\n");
3788 adev->gmc.xgmi.pending_reset = true;
3789 /* Only need to init necessary block for SMU to handle the reset */
3790 for (i = 0; i < adev->num_ip_blocks; i++) {
3791 if (!adev->ip_blocks[i].status.valid)
3792 continue;
3793 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3794 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3795 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3796 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3797 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3798 adev->ip_blocks[i].version->funcs->name);
3799 adev->ip_blocks[i].status.hw = true;
3800 }
3801 }
3802 } else {
59e9fff1 3803 tmp = amdgpu_reset_method;
3804 /* It should do a default reset when loading or reloading the driver,
3805 * regardless of the module parameter reset_method.
3806 */
3807 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3808 r = amdgpu_asic_reset(adev);
59e9fff1 3809 amdgpu_reset_method = tmp;
e3c1b071 3810 if (r) {
3811 dev_err(adev->dev, "asic reset on init failed\n");
3812 goto failed;
3813 }
95e8e59e
AD
3814 }
3815 }
3816
d38ceaf9 3817 /* Post card if necessary */
39c640c0 3818 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3819 if (!adev->bios) {
bec86378 3820 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3821 r = -EINVAL;
3822 goto failed;
d38ceaf9 3823 }
bec86378 3824 DRM_INFO("GPU posting now...\n");
4d2997ab 3825 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3826 if (r) {
3827 dev_err(adev->dev, "gpu post error!\n");
3828 goto failed;
3829 }
d38ceaf9
AD
3830 }
3831
9535a86a
SZ
3832 if (adev->bios) {
3833 if (adev->is_atom_fw) {
3834 /* Initialize clocks */
3835 r = amdgpu_atomfirmware_get_clock_info(adev);
3836 if (r) {
3837 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3838 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3839 goto failed;
3840 }
3841 } else {
3842 /* Initialize clocks */
3843 r = amdgpu_atombios_get_clock_info(adev);
3844 if (r) {
3845 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3846 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3847 goto failed;
3848 }
3849 /* init i2c buses */
3850 if (!amdgpu_device_has_dc_support(adev))
3851 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3852 }
2c1a2784 3853 }
d38ceaf9 3854
bfca0289 3855fence_driver_init:
d38ceaf9 3856 /* Fence driver */
067f44c8 3857 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3858 if (r) {
067f44c8 3859 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3860 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3861 goto failed;
2c1a2784 3862 }
d38ceaf9
AD
3863
3864 /* init the mode config */
4a580877 3865 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3866
06ec9070 3867 r = amdgpu_device_ip_init(adev);
d38ceaf9 3868 if (r) {
06ec9070 3869 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3870 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3871 goto release_ras_con;
d38ceaf9
AD
3872 }
3873
8d35a259
LG
3874 amdgpu_fence_driver_hw_init(adev);
3875
d69b8971
YZ
3876 dev_info(adev->dev,
3877 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3878 adev->gfx.config.max_shader_engines,
3879 adev->gfx.config.max_sh_per_se,
3880 adev->gfx.config.max_cu_per_sh,
3881 adev->gfx.cu_info.number);
3882
d38ceaf9
AD
3883 adev->accel_working = true;
3884
e59c0205
AX
3885 amdgpu_vm_check_compute_bug(adev);
3886
95844d20
MO
3887 /* Initialize the buffer migration limit. */
3888 if (amdgpu_moverate >= 0)
3889 max_MBps = amdgpu_moverate;
3890 else
3891 max_MBps = 8; /* Allow 8 MB/s. */
3892 /* Get a log2 for easy divisions. */
3893 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3894
184d8384
LL
3895 r = amdgpu_atombios_sysfs_init(adev);
3896 if (r)
3897 drm_err(&adev->ddev,
3898 "registering atombios sysfs failed (%d).\n", r);
3899
d2f52ac8 3900 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
3901 if (r)
3902 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 3903
5bb23532 3904 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3905 if (r) {
3906 adev->ucode_sysfs_en = false;
5bb23532 3907 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3908 } else
3909 adev->ucode_sysfs_en = true;
5bb23532 3910
b0adca4d
EQ
3911 /*
3912 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3913 * Otherwise the mgpu fan boost feature will be skipped due to the
3914 * gpu instance is counted less.
3915 */
3916 amdgpu_register_gpu_instance(adev);
3917
d38ceaf9
AD
3918 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3919 * explicit gating rather than handling it automatically.
3920 */
e3c1b071 3921 if (!adev->gmc.xgmi.pending_reset) {
3922 r = amdgpu_device_ip_late_init(adev);
3923 if (r) {
3924 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3925 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3926 goto release_ras_con;
e3c1b071 3927 }
3928 /* must succeed. */
3929 amdgpu_ras_resume(adev);
3930 queue_delayed_work(system_wq, &adev->delayed_init_work,
3931 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3932 }
d38ceaf9 3933
38eecbe0
CL
3934 if (amdgpu_sriov_vf(adev)) {
3935 amdgpu_virt_release_full_gpu(adev, true);
2c738637 3936 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 3937 }
2c738637 3938
77f3a5cd 3939 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3940 if (r)
77f3a5cd 3941 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3942
d155bef0
AB
3943 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3944 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3945 if (r)
3946 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3947
c1dd4aa6
AG
3948 /* Have stored pci confspace at hand for restore in sudden PCI error */
3949 if (amdgpu_device_cache_pci_state(adev->pdev))
3950 pci_restore_state(pdev);
3951
8c3dd61c
KHF
3952 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3953 /* this will fail for cards that aren't VGA class devices, just
3954 * ignore it */
3955 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3956 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 3957
d37a3929
OC
3958 px = amdgpu_device_supports_px(ddev);
3959
3960 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3961 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
3962 vga_switcheroo_register_client(adev->pdev,
3963 &amdgpu_switcheroo_ops, px);
d37a3929
OC
3964
3965 if (px)
8c3dd61c 3966 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 3967
e3c1b071 3968 if (adev->gmc.xgmi.pending_reset)
3969 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3970 msecs_to_jiffies(AMDGPU_RESUME_MS));
3971
4a74c38c
PY
3972 amdgpu_device_check_iommu_direct_map(adev);
3973
d38ceaf9 3974 return 0;
83ba126a 3975
970fd197 3976release_ras_con:
38eecbe0
CL
3977 if (amdgpu_sriov_vf(adev))
3978 amdgpu_virt_release_full_gpu(adev, true);
3979
3980 /* failed in exclusive mode due to timeout */
3981 if (amdgpu_sriov_vf(adev) &&
3982 !amdgpu_sriov_runtime(adev) &&
3983 amdgpu_virt_mmio_blocked(adev) &&
3984 !amdgpu_virt_wait_reset(adev)) {
3985 dev_err(adev->dev, "VF exclusive mode timeout\n");
3986 /* Don't send request since VF is inactive. */
3987 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3988 adev->virt.ops = NULL;
3989 r = -EAGAIN;
3990 }
970fd197
SY
3991 amdgpu_release_ras_context(adev);
3992
83ba126a 3993failed:
89041940 3994 amdgpu_vf_error_trans_all(adev);
8840a387 3995
83ba126a 3996 return r;
d38ceaf9
AD
3997}
3998
07775fc1
AG
3999static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4000{
62d5f9f7 4001
07775fc1
AG
4002 /* Clear all CPU mappings pointing to this device */
4003 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4004
4005 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4006 amdgpu_doorbell_fini(adev);
07775fc1
AG
4007
4008 iounmap(adev->rmmio);
4009 adev->rmmio = NULL;
4010 if (adev->mman.aper_base_kaddr)
4011 iounmap(adev->mman.aper_base_kaddr);
4012 adev->mman.aper_base_kaddr = NULL;
4013
4014 /* Memory manager related */
a0ba1279 4015 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4016 arch_phys_wc_del(adev->gmc.vram_mtrr);
4017 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4018 }
4019}
4020
d38ceaf9 4021/**
bbe04dec 4022 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4023 *
4024 * @adev: amdgpu_device pointer
4025 *
4026 * Tear down the driver info (all asics).
4027 * Called at driver shutdown.
4028 */
72c8c97b 4029void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4030{
aac89168 4031 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4032 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4033 adev->shutdown = true;
9f875167 4034
752c683d
ML
4035 /* make sure IB test finished before entering exclusive mode
4036 * to avoid preemption on IB test
4037 * */
519b8b76 4038 if (amdgpu_sriov_vf(adev)) {
752c683d 4039 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4040 amdgpu_virt_fini_data_exchange(adev);
4041 }
752c683d 4042
e5b03032
ML
4043 /* disable all interrupts */
4044 amdgpu_irq_disable_all(adev);
47fc644f 4045 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4046 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4047 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4048 else
4a580877 4049 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4050 }
8d35a259 4051 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4052
cd3a8a59 4053 if (adev->mman.initialized)
9bff18d1 4054 drain_workqueue(adev->mman.bdev.wq);
98f56188 4055
53e9d836 4056 if (adev->pm.sysfs_initialized)
7c868b59 4057 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4058 if (adev->ucode_sysfs_en)
4059 amdgpu_ucode_sysfs_fini(adev);
4060 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4061
232d1d43
SY
4062 /* disable ras feature must before hw fini */
4063 amdgpu_ras_pre_fini(adev);
4064
e9669fb7 4065 amdgpu_device_ip_fini_early(adev);
d10d0daa 4066
a3848df6
YW
4067 amdgpu_irq_fini_hw(adev);
4068
b6fd6e0f
SK
4069 if (adev->mman.initialized)
4070 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4071
d10d0daa 4072 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4073
39934d3e
VP
4074 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4075 amdgpu_device_unmap_mmio(adev);
87172e89 4076
72c8c97b
AG
4077}
4078
4079void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4080{
62d5f9f7 4081 int idx;
d37a3929 4082 bool px;
62d5f9f7 4083
8d35a259 4084 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4085 amdgpu_device_ip_fini(adev);
b31d3063 4086 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4087 adev->accel_working = false;
68ce8b24 4088 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4089
4090 amdgpu_reset_fini(adev);
4091
d38ceaf9 4092 /* free i2c buses */
4562236b
HW
4093 if (!amdgpu_device_has_dc_support(adev))
4094 amdgpu_i2c_fini(adev);
bfca0289
SL
4095
4096 if (amdgpu_emu_mode != 1)
4097 amdgpu_atombios_fini(adev);
4098
d38ceaf9
AD
4099 kfree(adev->bios);
4100 adev->bios = NULL;
d37a3929
OC
4101
4102 px = amdgpu_device_supports_px(adev_to_drm(adev));
4103
4104 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4105 apple_gmux_detect(NULL, NULL)))
84c8b22e 4106 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4107
4108 if (px)
83ba126a 4109 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4110
38d6be81 4111 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4112 vga_client_unregister(adev->pdev);
e9bc1bf7 4113
62d5f9f7
LS
4114 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4115
4116 iounmap(adev->rmmio);
4117 adev->rmmio = NULL;
43c064db 4118 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4119 drm_dev_exit(idx);
4120 }
4121
d155bef0
AB
4122 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4123 amdgpu_pmu_fini(adev);
72de33f8 4124 if (adev->mman.discovery_bin)
a190d1c7 4125 amdgpu_discovery_fini(adev);
72c8c97b 4126
cfbb6b00
AG
4127 amdgpu_reset_put_reset_domain(adev->reset_domain);
4128 adev->reset_domain = NULL;
4129
72c8c97b
AG
4130 kfree(adev->pci_state);
4131
d38ceaf9
AD
4132}
4133
58144d28
ND
4134/**
4135 * amdgpu_device_evict_resources - evict device resources
4136 * @adev: amdgpu device object
4137 *
4138 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4139 * of the vram memory type. Mainly used for evicting device resources
4140 * at suspend time.
4141 *
4142 */
7863c155 4143static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4144{
7863c155
ML
4145 int ret;
4146
e53d9665
ML
4147 /* No need to evict vram on APUs for suspend to ram or s2idle */
4148 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4149 return 0;
58144d28 4150
7863c155
ML
4151 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4152 if (ret)
58144d28 4153 DRM_WARN("evicting device resources failed\n");
7863c155 4154 return ret;
58144d28 4155}
d38ceaf9
AD
4156
4157/*
4158 * Suspend & resume.
4159 */
4160/**
810ddc3a 4161 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4162 *
87e3f136 4163 * @dev: drm dev pointer
87e3f136 4164 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4165 *
4166 * Puts the hw in the suspend state (all asics).
4167 * Returns 0 for success or an error on failure.
4168 * Called at driver suspend.
4169 */
de185019 4170int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4171{
a2e15b0e 4172 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4173 int r = 0;
d38ceaf9 4174
d38ceaf9
AD
4175 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4176 return 0;
4177
44779b43 4178 adev->in_suspend = true;
3fa8f89d 4179
47ea2076
SF
4180 /* Evict the majority of BOs before grabbing the full access */
4181 r = amdgpu_device_evict_resources(adev);
4182 if (r)
4183 return r;
4184
d7274ec7
BZ
4185 if (amdgpu_sriov_vf(adev)) {
4186 amdgpu_virt_fini_data_exchange(adev);
4187 r = amdgpu_virt_request_full_gpu(adev, false);
4188 if (r)
4189 return r;
4190 }
4191
3fa8f89d
S
4192 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4193 DRM_WARN("smart shift update failed\n");
4194
5f818173 4195 if (fbcon)
087451f3 4196 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4197
beff74bc 4198 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4199
5e6932fe 4200 amdgpu_ras_suspend(adev);
4201
2196927b 4202 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4203
c004d44e 4204 if (!adev->in_s0ix)
5d3a2d95 4205 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4206
7863c155
ML
4207 r = amdgpu_device_evict_resources(adev);
4208 if (r)
4209 return r;
d38ceaf9 4210
8d35a259 4211 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4212
2196927b 4213 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4214
d7274ec7
BZ
4215 if (amdgpu_sriov_vf(adev))
4216 amdgpu_virt_release_full_gpu(adev, false);
4217
d38ceaf9
AD
4218 return 0;
4219}
4220
4221/**
810ddc3a 4222 * amdgpu_device_resume - initiate device resume
d38ceaf9 4223 *
87e3f136 4224 * @dev: drm dev pointer
87e3f136 4225 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4226 *
4227 * Bring the hw back to operating state (all asics).
4228 * Returns 0 for success or an error on failure.
4229 * Called at driver resume.
4230 */
de185019 4231int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4232{
1348969a 4233 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4234 int r = 0;
d38ceaf9 4235
d7274ec7
BZ
4236 if (amdgpu_sriov_vf(adev)) {
4237 r = amdgpu_virt_request_full_gpu(adev, true);
4238 if (r)
4239 return r;
4240 }
4241
d38ceaf9
AD
4242 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4243 return 0;
4244
62498733 4245 if (adev->in_s0ix)
bc143d8b 4246 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4247
d38ceaf9 4248 /* post card */
39c640c0 4249 if (amdgpu_device_need_post(adev)) {
4d2997ab 4250 r = amdgpu_device_asic_init(adev);
74b0b157 4251 if (r)
aac89168 4252 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4253 }
d38ceaf9 4254
06ec9070 4255 r = amdgpu_device_ip_resume(adev);
d7274ec7 4256
e6707218 4257 if (r) {
aac89168 4258 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4259 goto exit;
e6707218 4260 }
8d35a259 4261 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4262
06ec9070 4263 r = amdgpu_device_ip_late_init(adev);
03161a6e 4264 if (r)
3c22c1ea 4265 goto exit;
d38ceaf9 4266
beff74bc
AD
4267 queue_delayed_work(system_wq, &adev->delayed_init_work,
4268 msecs_to_jiffies(AMDGPU_RESUME_MS));
4269
c004d44e 4270 if (!adev->in_s0ix) {
5d3a2d95
AD
4271 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4272 if (r)
3c22c1ea 4273 goto exit;
5d3a2d95 4274 }
756e6880 4275
3c22c1ea
SF
4276exit:
4277 if (amdgpu_sriov_vf(adev)) {
4278 amdgpu_virt_init_data_exchange(adev);
4279 amdgpu_virt_release_full_gpu(adev, true);
4280 }
4281
4282 if (r)
4283 return r;
4284
96a5d8d4 4285 /* Make sure IB tests flushed */
beff74bc 4286 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4287
a2e15b0e 4288 if (fbcon)
087451f3 4289 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4290
5e6932fe 4291 amdgpu_ras_resume(adev);
4292
d09ef243
AD
4293 if (adev->mode_info.num_crtc) {
4294 /*
4295 * Most of the connector probing functions try to acquire runtime pm
4296 * refs to ensure that the GPU is powered on when connector polling is
4297 * performed. Since we're calling this from a runtime PM callback,
4298 * trying to acquire rpm refs will cause us to deadlock.
4299 *
4300 * Since we're guaranteed to be holding the rpm lock, it's safe to
4301 * temporarily disable the rpm helpers so this doesn't deadlock us.
4302 */
23a1a9e5 4303#ifdef CONFIG_PM
d09ef243 4304 dev->dev->power.disable_depth++;
23a1a9e5 4305#endif
d09ef243
AD
4306 if (!adev->dc_enabled)
4307 drm_helper_hpd_irq_event(dev);
4308 else
4309 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4310#ifdef CONFIG_PM
d09ef243 4311 dev->dev->power.disable_depth--;
23a1a9e5 4312#endif
d09ef243 4313 }
44779b43
RZ
4314 adev->in_suspend = false;
4315
dc907c9d
JX
4316 if (adev->enable_mes)
4317 amdgpu_mes_self_test(adev);
4318
3fa8f89d
S
4319 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4320 DRM_WARN("smart shift update failed\n");
4321
4d3b9ae5 4322 return 0;
d38ceaf9
AD
4323}
4324
e3ecdffa
AD
4325/**
4326 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4327 *
4328 * @adev: amdgpu_device pointer
4329 *
4330 * The list of all the hardware IPs that make up the asic is walked and
4331 * the check_soft_reset callbacks are run. check_soft_reset determines
4332 * if the asic is still hung or not.
4333 * Returns true if any of the IPs are still in a hung state, false if not.
4334 */
06ec9070 4335static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4336{
4337 int i;
4338 bool asic_hang = false;
4339
f993d628
ML
4340 if (amdgpu_sriov_vf(adev))
4341 return true;
4342
8bc04c29
AD
4343 if (amdgpu_asic_need_full_reset(adev))
4344 return true;
4345
63fbf42f 4346 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4347 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4348 continue;
a1255107
AD
4349 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4350 adev->ip_blocks[i].status.hang =
4351 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4352 if (adev->ip_blocks[i].status.hang) {
aac89168 4353 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4354 asic_hang = true;
4355 }
4356 }
4357 return asic_hang;
4358}
4359
e3ecdffa
AD
4360/**
4361 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4362 *
4363 * @adev: amdgpu_device pointer
4364 *
4365 * The list of all the hardware IPs that make up the asic is walked and the
4366 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4367 * handles any IP specific hardware or software state changes that are
4368 * necessary for a soft reset to succeed.
4369 * Returns 0 on success, negative error code on failure.
4370 */
06ec9070 4371static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4372{
4373 int i, r = 0;
4374
4375 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4376 if (!adev->ip_blocks[i].status.valid)
d31a501e 4377 continue;
a1255107
AD
4378 if (adev->ip_blocks[i].status.hang &&
4379 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4380 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4381 if (r)
4382 return r;
4383 }
4384 }
4385
4386 return 0;
4387}
4388
e3ecdffa
AD
4389/**
4390 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4391 *
4392 * @adev: amdgpu_device pointer
4393 *
4394 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4395 * reset is necessary to recover.
4396 * Returns true if a full asic reset is required, false if not.
4397 */
06ec9070 4398static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4399{
da146d3b
AD
4400 int i;
4401
8bc04c29
AD
4402 if (amdgpu_asic_need_full_reset(adev))
4403 return true;
4404
da146d3b 4405 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4406 if (!adev->ip_blocks[i].status.valid)
da146d3b 4407 continue;
a1255107
AD
4408 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4409 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4410 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4411 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4412 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4413 if (adev->ip_blocks[i].status.hang) {
aac89168 4414 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4415 return true;
4416 }
4417 }
35d782fe
CZ
4418 }
4419 return false;
4420}
4421
e3ecdffa
AD
4422/**
4423 * amdgpu_device_ip_soft_reset - do a soft reset
4424 *
4425 * @adev: amdgpu_device pointer
4426 *
4427 * The list of all the hardware IPs that make up the asic is walked and the
4428 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4429 * IP specific hardware or software state changes that are necessary to soft
4430 * reset the IP.
4431 * Returns 0 on success, negative error code on failure.
4432 */
06ec9070 4433static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4434{
4435 int i, r = 0;
4436
4437 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4438 if (!adev->ip_blocks[i].status.valid)
35d782fe 4439 continue;
a1255107
AD
4440 if (adev->ip_blocks[i].status.hang &&
4441 adev->ip_blocks[i].version->funcs->soft_reset) {
4442 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4443 if (r)
4444 return r;
4445 }
4446 }
4447
4448 return 0;
4449}
4450
e3ecdffa
AD
4451/**
4452 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4453 *
4454 * @adev: amdgpu_device pointer
4455 *
4456 * The list of all the hardware IPs that make up the asic is walked and the
4457 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4458 * handles any IP specific hardware or software state changes that are
4459 * necessary after the IP has been soft reset.
4460 * Returns 0 on success, negative error code on failure.
4461 */
06ec9070 4462static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4463{
4464 int i, r = 0;
4465
4466 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4467 if (!adev->ip_blocks[i].status.valid)
35d782fe 4468 continue;
a1255107
AD
4469 if (adev->ip_blocks[i].status.hang &&
4470 adev->ip_blocks[i].version->funcs->post_soft_reset)
4471 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4472 if (r)
4473 return r;
4474 }
4475
4476 return 0;
4477}
4478
e3ecdffa 4479/**
c33adbc7 4480 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4481 *
4482 * @adev: amdgpu_device pointer
4483 *
4484 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4485 * restore things like GPUVM page tables after a GPU reset where
4486 * the contents of VRAM might be lost.
403009bf
CK
4487 *
4488 * Returns:
4489 * 0 on success, negative error code on failure.
e3ecdffa 4490 */
c33adbc7 4491static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4492{
c41d1cf6 4493 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4494 struct amdgpu_bo *shadow;
e18aaea7 4495 struct amdgpu_bo_vm *vmbo;
403009bf 4496 long r = 1, tmo;
c41d1cf6
ML
4497
4498 if (amdgpu_sriov_runtime(adev))
b045d3af 4499 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4500 else
4501 tmo = msecs_to_jiffies(100);
4502
aac89168 4503 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4504 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4505 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4506 /* If vm is compute context or adev is APU, shadow will be NULL */
4507 if (!vmbo->shadow)
4508 continue;
4509 shadow = vmbo->shadow;
4510
403009bf 4511 /* No need to recover an evicted BO */
d3116756
CK
4512 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4513 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4514 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4515 continue;
4516
4517 r = amdgpu_bo_restore_shadow(shadow, &next);
4518 if (r)
4519 break;
4520
c41d1cf6 4521 if (fence) {
1712fb1a 4522 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4523 dma_fence_put(fence);
4524 fence = next;
1712fb1a 4525 if (tmo == 0) {
4526 r = -ETIMEDOUT;
c41d1cf6 4527 break;
1712fb1a 4528 } else if (tmo < 0) {
4529 r = tmo;
4530 break;
4531 }
403009bf
CK
4532 } else {
4533 fence = next;
c41d1cf6 4534 }
c41d1cf6
ML
4535 }
4536 mutex_unlock(&adev->shadow_list_lock);
4537
403009bf
CK
4538 if (fence)
4539 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4540 dma_fence_put(fence);
4541
1712fb1a 4542 if (r < 0 || tmo <= 0) {
aac89168 4543 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4544 return -EIO;
4545 }
c41d1cf6 4546
aac89168 4547 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4548 return 0;
c41d1cf6
ML
4549}
4550
a90ad3c2 4551
e3ecdffa 4552/**
06ec9070 4553 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4554 *
982a820b 4555 * @adev: amdgpu_device pointer
87e3f136 4556 * @from_hypervisor: request from hypervisor
5740682e
ML
4557 *
4558 * do VF FLR and reinitialize Asic
3f48c681 4559 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4560 */
4561static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4562 bool from_hypervisor)
5740682e
ML
4563{
4564 int r;
a5f67c93 4565 struct amdgpu_hive_info *hive = NULL;
7258fa31 4566 int retry_limit = 0;
5740682e 4567
7258fa31 4568retry:
c004d44e 4569 amdgpu_amdkfd_pre_reset(adev);
428890a3 4570
5740682e
ML
4571 if (from_hypervisor)
4572 r = amdgpu_virt_request_full_gpu(adev, true);
4573 else
4574 r = amdgpu_virt_reset_gpu(adev);
4575 if (r)
4576 return r;
a90ad3c2 4577
83f24a8f
HC
4578 /* some sw clean up VF needs to do before recover */
4579 amdgpu_virt_post_reset(adev);
4580
a90ad3c2 4581 /* Resume IP prior to SMC */
06ec9070 4582 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4583 if (r)
4584 goto error;
a90ad3c2 4585
c9ffa427 4586 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4587
7a3e0bb2
RZ
4588 r = amdgpu_device_fw_loading(adev);
4589 if (r)
4590 return r;
4591
a90ad3c2 4592 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4593 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4594 if (r)
4595 goto error;
a90ad3c2 4596
a5f67c93
ZL
4597 hive = amdgpu_get_xgmi_hive(adev);
4598 /* Update PSP FW topology after reset */
4599 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4600 r = amdgpu_xgmi_update_topology(hive, adev);
4601
4602 if (hive)
4603 amdgpu_put_xgmi_hive(hive);
4604
4605 if (!r) {
4606 amdgpu_irq_gpu_reset_resume_helper(adev);
4607 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4608
c004d44e 4609 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4610 }
a90ad3c2 4611
abc34253 4612error:
c41d1cf6 4613 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4614 amdgpu_inc_vram_lost(adev);
c33adbc7 4615 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4616 }
437f3e0b 4617 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4618
7258fa31
SK
4619 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4620 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4621 retry_limit++;
4622 goto retry;
4623 } else
4624 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4625 }
4626
a90ad3c2
ML
4627 return r;
4628}
4629
9a1cddd6 4630/**
4631 * amdgpu_device_has_job_running - check if there is any job in mirror list
4632 *
982a820b 4633 * @adev: amdgpu_device pointer
9a1cddd6 4634 *
4635 * check if there is any job in mirror list
4636 */
4637bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4638{
4639 int i;
4640 struct drm_sched_job *job;
4641
4642 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4643 struct amdgpu_ring *ring = adev->rings[i];
4644
4645 if (!ring || !ring->sched.thread)
4646 continue;
4647
4648 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4649 job = list_first_entry_or_null(&ring->sched.pending_list,
4650 struct drm_sched_job, list);
9a1cddd6 4651 spin_unlock(&ring->sched.job_list_lock);
4652 if (job)
4653 return true;
4654 }
4655 return false;
4656}
4657
12938fad
CK
4658/**
4659 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4660 *
982a820b 4661 * @adev: amdgpu_device pointer
12938fad
CK
4662 *
4663 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4664 * a hung GPU.
4665 */
4666bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4667{
12938fad 4668
3ba7b418
AG
4669 if (amdgpu_gpu_recovery == 0)
4670 goto disabled;
4671
1a11a65d
YC
4672 /* Skip soft reset check in fatal error mode */
4673 if (!amdgpu_ras_is_poison_mode_supported(adev))
4674 return true;
4675
3ba7b418
AG
4676 if (amdgpu_sriov_vf(adev))
4677 return true;
4678
4679 if (amdgpu_gpu_recovery == -1) {
4680 switch (adev->asic_type) {
b3523c45
AD
4681#ifdef CONFIG_DRM_AMDGPU_SI
4682 case CHIP_VERDE:
4683 case CHIP_TAHITI:
4684 case CHIP_PITCAIRN:
4685 case CHIP_OLAND:
4686 case CHIP_HAINAN:
4687#endif
4688#ifdef CONFIG_DRM_AMDGPU_CIK
4689 case CHIP_KAVERI:
4690 case CHIP_KABINI:
4691 case CHIP_MULLINS:
4692#endif
4693 case CHIP_CARRIZO:
4694 case CHIP_STONEY:
4695 case CHIP_CYAN_SKILLFISH:
3ba7b418 4696 goto disabled;
b3523c45
AD
4697 default:
4698 break;
3ba7b418 4699 }
12938fad
CK
4700 }
4701
4702 return true;
3ba7b418
AG
4703
4704disabled:
aac89168 4705 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4706 return false;
12938fad
CK
4707}
4708
5c03e584
FX
4709int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4710{
47fc644f
SS
4711 u32 i;
4712 int ret = 0;
5c03e584 4713
47fc644f 4714 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4715
47fc644f 4716 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4717
47fc644f
SS
4718 /* disable BM */
4719 pci_clear_master(adev->pdev);
5c03e584 4720
47fc644f 4721 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4722
47fc644f
SS
4723 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4724 dev_info(adev->dev, "GPU smu mode1 reset\n");
4725 ret = amdgpu_dpm_mode1_reset(adev);
4726 } else {
4727 dev_info(adev->dev, "GPU psp mode1 reset\n");
4728 ret = psp_gpu_reset(adev);
4729 }
5c03e584 4730
47fc644f
SS
4731 if (ret)
4732 dev_err(adev->dev, "GPU mode1 reset failed\n");
5c03e584 4733
47fc644f 4734 amdgpu_device_load_pci_state(adev->pdev);
5c03e584 4735
47fc644f
SS
4736 /* wait for asic to come out of reset */
4737 for (i = 0; i < adev->usec_timeout; i++) {
4738 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4739
47fc644f
SS
4740 if (memsize != 0xffffffff)
4741 break;
4742 udelay(1);
4743 }
5c03e584 4744
47fc644f
SS
4745 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4746 return ret;
5c03e584 4747}
5c6dd71e 4748
e3c1b071 4749int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4750 struct amdgpu_reset_context *reset_context)
26bc5340 4751{
5c1e6fa4 4752 int i, r = 0;
04442bf7
LL
4753 struct amdgpu_job *job = NULL;
4754 bool need_full_reset =
4755 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4756
4757 if (reset_context->reset_req_dev == adev)
4758 job = reset_context->job;
71182665 4759
b602ca5f
TZ
4760 if (amdgpu_sriov_vf(adev)) {
4761 /* stop the data exchange thread */
4762 amdgpu_virt_fini_data_exchange(adev);
4763 }
4764
9e225fb9
AG
4765 amdgpu_fence_driver_isr_toggle(adev, true);
4766
71182665 4767 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4768 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4769 struct amdgpu_ring *ring = adev->rings[i];
4770
51687759 4771 if (!ring || !ring->sched.thread)
0875dc9e 4772 continue;
5740682e 4773
c530b02f
JZ
4774 /*clear job fence from fence drv to avoid force_completion
4775 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4776 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4777
2f9d4084
ML
4778 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4779 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4780 }
d38ceaf9 4781
9e225fb9
AG
4782 amdgpu_fence_driver_isr_toggle(adev, false);
4783
ff99849b 4784 if (job && job->vm)
222b5f04
AG
4785 drm_sched_increase_karma(&job->base);
4786
04442bf7 4787 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4788 /* If reset handler not implemented, continue; otherwise return */
4789 if (r == -ENOSYS)
4790 r = 0;
4791 else
04442bf7
LL
4792 return r;
4793
1d721ed6 4794 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4795 if (!amdgpu_sriov_vf(adev)) {
4796
4797 if (!need_full_reset)
4798 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4799
360cd081
LG
4800 if (!need_full_reset && amdgpu_gpu_recovery &&
4801 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4802 amdgpu_device_ip_pre_soft_reset(adev);
4803 r = amdgpu_device_ip_soft_reset(adev);
4804 amdgpu_device_ip_post_soft_reset(adev);
4805 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4806 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4807 need_full_reset = true;
4808 }
4809 }
4810
4811 if (need_full_reset)
4812 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4813 if (need_full_reset)
4814 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4815 else
4816 clear_bit(AMDGPU_NEED_FULL_RESET,
4817 &reset_context->flags);
26bc5340
AG
4818 }
4819
4820 return r;
4821}
4822
15fd09a0
SA
4823static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4824{
15fd09a0
SA
4825 int i;
4826
38a15ad9 4827 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4828
4829 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4830 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4831 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4832 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4833 }
4834
4835 return 0;
4836}
4837
3d8785f6
SA
4838#ifdef CONFIG_DEV_COREDUMP
4839static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4840 size_t count, void *data, size_t datalen)
4841{
4842 struct drm_printer p;
4843 struct amdgpu_device *adev = data;
4844 struct drm_print_iterator iter;
4845 int i;
4846
4847 iter.data = buffer;
4848 iter.offset = 0;
4849 iter.start = offset;
4850 iter.remain = count;
4851
4852 p = drm_coredump_printer(&iter);
4853
4854 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4855 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4856 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4857 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4858 if (adev->reset_task_info.pid)
4859 drm_printf(&p, "process_name: %s PID: %d\n",
4860 adev->reset_task_info.process_name,
4861 adev->reset_task_info.pid);
4862
4863 if (adev->reset_vram_lost)
4864 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4865 if (adev->num_regs) {
4866 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4867
4868 for (i = 0; i < adev->num_regs; i++)
4869 drm_printf(&p, "0x%08x: 0x%08x\n",
4870 adev->reset_dump_reg_list[i],
4871 adev->reset_dump_reg_value[i]);
4872 }
4873
4874 return count - iter.remain;
4875}
4876
4877static void amdgpu_devcoredump_free(void *data)
4878{
4879}
4880
4881static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4882{
4883 struct drm_device *dev = adev_to_drm(adev);
4884
4885 ktime_get_ts64(&adev->reset_time);
4886 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4887 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4888}
4889#endif
4890
04442bf7
LL
4891int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4892 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4893{
4894 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4895 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4896 int r = 0;
f5c7e779 4897 bool gpu_reset_for_dev_remove = 0;
26bc5340 4898
04442bf7
LL
4899 /* Try reset handler method first */
4900 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4901 reset_list);
15fd09a0 4902 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4903
4904 reset_context->reset_device_list = device_list_handle;
04442bf7 4905 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4906 /* If reset handler not implemented, continue; otherwise return */
4907 if (r == -ENOSYS)
4908 r = 0;
4909 else
04442bf7
LL
4910 return r;
4911
4912 /* Reset handler not implemented, use the default method */
4913 need_full_reset =
4914 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4915 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4916
f5c7e779
YC
4917 gpu_reset_for_dev_remove =
4918 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4919 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4920
26bc5340 4921 /*
655ce9cb 4922 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4923 * to allow proper links negotiation in FW (within 1 sec)
4924 */
7ac71382 4925 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4926 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4927 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4928 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4929 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4930 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4931 r = -EALREADY;
4932 } else
4933 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4934
041a62bc 4935 if (r) {
aac89168 4936 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4937 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4938 break;
ce316fa5
LM
4939 }
4940 }
4941
041a62bc
AG
4942 /* For XGMI wait for all resets to complete before proceed */
4943 if (!r) {
655ce9cb 4944 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4945 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4946 flush_work(&tmp_adev->xgmi_reset_work);
4947 r = tmp_adev->asic_reset_res;
4948 if (r)
4949 break;
ce316fa5
LM
4950 }
4951 }
4952 }
ce316fa5 4953 }
26bc5340 4954
43c4d576 4955 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4956 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4957 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4958 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4959 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4960 }
4961
00eaa571 4962 amdgpu_ras_intr_cleared();
43c4d576 4963 }
00eaa571 4964
f5c7e779
YC
4965 /* Since the mode1 reset affects base ip blocks, the
4966 * phase1 ip blocks need to be resumed. Otherwise there
4967 * will be a BIOS signature error and the psp bootloader
4968 * can't load kdb on the next amdgpu install.
4969 */
4970 if (gpu_reset_for_dev_remove) {
4971 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4972 amdgpu_device_ip_resume_phase1(tmp_adev);
4973
4974 goto end;
4975 }
4976
655ce9cb 4977 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4978 if (need_full_reset) {
4979 /* post card */
e3c1b071 4980 r = amdgpu_device_asic_init(tmp_adev);
4981 if (r) {
aac89168 4982 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4983 } else {
26bc5340 4984 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4985 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4986 if (r)
4987 goto out;
4988
26bc5340
AG
4989 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4990 if (r)
4991 goto out;
4992
4993 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4994#ifdef CONFIG_DEV_COREDUMP
4995 tmp_adev->reset_vram_lost = vram_lost;
4996 memset(&tmp_adev->reset_task_info, 0,
4997 sizeof(tmp_adev->reset_task_info));
4998 if (reset_context->job && reset_context->job->vm)
4999 tmp_adev->reset_task_info =
5000 reset_context->job->vm->task_info;
5001 amdgpu_reset_capture_coredumpm(tmp_adev);
5002#endif
26bc5340 5003 if (vram_lost) {
77e7f829 5004 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5005 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5006 }
5007
26bc5340
AG
5008 r = amdgpu_device_fw_loading(tmp_adev);
5009 if (r)
5010 return r;
5011
5012 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5013 if (r)
5014 goto out;
5015
5016 if (vram_lost)
5017 amdgpu_device_fill_reset_magic(tmp_adev);
5018
fdafb359
EQ
5019 /*
5020 * Add this ASIC as tracked as reset was already
5021 * complete successfully.
5022 */
5023 amdgpu_register_gpu_instance(tmp_adev);
5024
04442bf7
LL
5025 if (!reset_context->hive &&
5026 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5027 amdgpu_xgmi_add_device(tmp_adev);
5028
7c04ca50 5029 r = amdgpu_device_ip_late_init(tmp_adev);
5030 if (r)
5031 goto out;
5032
087451f3 5033 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5034
e8fbaf03
GC
5035 /*
5036 * The GPU enters bad state once faulty pages
5037 * by ECC has reached the threshold, and ras
5038 * recovery is scheduled next. So add one check
5039 * here to break recovery if it indeed exceeds
5040 * bad page threshold, and remind user to
5041 * retire this GPU or setting one bigger
5042 * bad_page_threshold value to fix this once
5043 * probing driver again.
5044 */
11003c68 5045 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5046 /* must succeed. */
5047 amdgpu_ras_resume(tmp_adev);
5048 } else {
5049 r = -EINVAL;
5050 goto out;
5051 }
e79a04d5 5052
26bc5340 5053 /* Update PSP FW topology after reset */
04442bf7
LL
5054 if (reset_context->hive &&
5055 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5056 r = amdgpu_xgmi_update_topology(
5057 reset_context->hive, tmp_adev);
26bc5340
AG
5058 }
5059 }
5060
26bc5340
AG
5061out:
5062 if (!r) {
5063 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5064 r = amdgpu_ib_ring_tests(tmp_adev);
5065 if (r) {
5066 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5067 need_full_reset = true;
5068 r = -EAGAIN;
5069 goto end;
5070 }
5071 }
5072
5073 if (!r)
5074 r = amdgpu_device_recover_vram(tmp_adev);
5075 else
5076 tmp_adev->asic_reset_res = r;
5077 }
5078
5079end:
04442bf7
LL
5080 if (need_full_reset)
5081 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5082 else
5083 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5084 return r;
5085}
5086
e923be99 5087static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5088{
5740682e 5089
a3a09142
AD
5090 switch (amdgpu_asic_reset_method(adev)) {
5091 case AMD_RESET_METHOD_MODE1:
5092 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5093 break;
5094 case AMD_RESET_METHOD_MODE2:
5095 adev->mp1_state = PP_MP1_STATE_RESET;
5096 break;
5097 default:
5098 adev->mp1_state = PP_MP1_STATE_NONE;
5099 break;
5100 }
26bc5340 5101}
d38ceaf9 5102
e923be99 5103static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5104{
89041940 5105 amdgpu_vf_error_trans_all(adev);
a3a09142 5106 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5107}
5108
3f12acc8
EQ
5109static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5110{
5111 struct pci_dev *p = NULL;
5112
5113 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5114 adev->pdev->bus->number, 1);
5115 if (p) {
5116 pm_runtime_enable(&(p->dev));
5117 pm_runtime_resume(&(p->dev));
5118 }
b85e285e
YY
5119
5120 pci_dev_put(p);
3f12acc8
EQ
5121}
5122
5123static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5124{
5125 enum amd_reset_method reset_method;
5126 struct pci_dev *p = NULL;
5127 u64 expires;
5128
5129 /*
5130 * For now, only BACO and mode1 reset are confirmed
5131 * to suffer the audio issue without proper suspended.
5132 */
5133 reset_method = amdgpu_asic_reset_method(adev);
5134 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5135 (reset_method != AMD_RESET_METHOD_MODE1))
5136 return -EINVAL;
5137
5138 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5139 adev->pdev->bus->number, 1);
5140 if (!p)
5141 return -ENODEV;
5142
5143 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5144 if (!expires)
5145 /*
5146 * If we cannot get the audio device autosuspend delay,
5147 * a fixed 4S interval will be used. Considering 3S is
5148 * the audio controller default autosuspend delay setting.
5149 * 4S used here is guaranteed to cover that.
5150 */
54b7feb9 5151 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5152
5153 while (!pm_runtime_status_suspended(&(p->dev))) {
5154 if (!pm_runtime_suspend(&(p->dev)))
5155 break;
5156
5157 if (expires < ktime_get_mono_fast_ns()) {
5158 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5159 pci_dev_put(p);
3f12acc8
EQ
5160 /* TODO: abort the succeeding gpu reset? */
5161 return -ETIMEDOUT;
5162 }
5163 }
5164
5165 pm_runtime_disable(&(p->dev));
5166
b85e285e 5167 pci_dev_put(p);
3f12acc8
EQ
5168 return 0;
5169}
5170
d193b12b 5171static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5172{
5173 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5174
5175#if defined(CONFIG_DEBUG_FS)
5176 if (!amdgpu_sriov_vf(adev))
5177 cancel_work(&adev->reset_work);
5178#endif
5179
5180 if (adev->kfd.dev)
5181 cancel_work(&adev->kfd.reset_work);
5182
5183 if (amdgpu_sriov_vf(adev))
5184 cancel_work(&adev->virt.flr_work);
5185
5186 if (con && adev->ras_enabled)
5187 cancel_work(&con->recovery_work);
5188
5189}
5190
26bc5340 5191/**
6e9c65f7 5192 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5193 *
982a820b 5194 * @adev: amdgpu_device pointer
26bc5340 5195 * @job: which job trigger hang
80bd2de1 5196 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5197 *
5198 * Attempt to reset the GPU if it has hung (all asics).
5199 * Attempt to do soft-reset or full-reset and reinitialize Asic
5200 * Returns 0 for success or an error on failure.
5201 */
5202
cf727044 5203int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5204 struct amdgpu_job *job,
5205 struct amdgpu_reset_context *reset_context)
26bc5340 5206{
1d721ed6 5207 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5208 bool job_signaled = false;
26bc5340 5209 struct amdgpu_hive_info *hive = NULL;
26bc5340 5210 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5211 int i, r = 0;
bb5c7235 5212 bool need_emergency_restart = false;
3f12acc8 5213 bool audio_suspended = false;
f5c7e779
YC
5214 bool gpu_reset_for_dev_remove = false;
5215
5216 gpu_reset_for_dev_remove =
5217 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5218 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5219
6e3cd2a9 5220 /*
bb5c7235
WS
5221 * Special case: RAS triggered and full reset isn't supported
5222 */
5223 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5224
d5ea093e
AG
5225 /*
5226 * Flush RAM to disk so that after reboot
5227 * the user can read log and see why the system rebooted.
5228 */
bb5c7235 5229 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5230 DRM_WARN("Emergency reboot.");
5231
5232 ksys_sync_helper();
5233 emergency_restart();
5234 }
5235
b823821f 5236 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5237 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5238
175ac6ec
ZL
5239 if (!amdgpu_sriov_vf(adev))
5240 hive = amdgpu_get_xgmi_hive(adev);
681260df 5241 if (hive)
53b3f8f4 5242 mutex_lock(&hive->hive_lock);
26bc5340 5243
f1549c09
LG
5244 reset_context->job = job;
5245 reset_context->hive = hive;
9e94d22c
EQ
5246 /*
5247 * Build list of devices to reset.
5248 * In case we are in XGMI hive mode, resort the device list
5249 * to put adev in the 1st position.
5250 */
5251 INIT_LIST_HEAD(&device_list);
175ac6ec 5252 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5253 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5254 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5255 if (gpu_reset_for_dev_remove && adev->shutdown)
5256 tmp_adev->shutdown = true;
5257 }
655ce9cb 5258 if (!list_is_first(&adev->reset_list, &device_list))
5259 list_rotate_to_front(&adev->reset_list, &device_list);
5260 device_list_handle = &device_list;
26bc5340 5261 } else {
655ce9cb 5262 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5263 device_list_handle = &device_list;
5264 }
5265
e923be99
AG
5266 /* We need to lock reset domain only once both for XGMI and single device */
5267 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5268 reset_list);
3675c2f2 5269 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5270
1d721ed6 5271 /* block all schedulers and reset given job's ring */
655ce9cb 5272 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5273
e923be99 5274 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5275
3f12acc8
EQ
5276 /*
5277 * Try to put the audio codec into suspend state
5278 * before gpu reset started.
5279 *
5280 * Due to the power domain of the graphics device
5281 * is shared with AZ power domain. Without this,
5282 * we may change the audio hardware from behind
5283 * the audio driver's back. That will trigger
5284 * some audio codec errors.
5285 */
5286 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5287 audio_suspended = true;
5288
9e94d22c
EQ
5289 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5290
52fb44cf
EQ
5291 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5292
c004d44e 5293 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5294 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5295
12ffa55d
AG
5296 /*
5297 * Mark these ASICs to be reseted as untracked first
5298 * And add them back after reset completed
5299 */
5300 amdgpu_unregister_gpu_instance(tmp_adev);
5301
163d4cd2 5302 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5303
f1c1314b 5304 /* disable ras on ALL IPs */
bb5c7235 5305 if (!need_emergency_restart &&
b823821f 5306 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5307 amdgpu_ras_suspend(tmp_adev);
5308
1d721ed6
AG
5309 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5310 struct amdgpu_ring *ring = tmp_adev->rings[i];
5311
5312 if (!ring || !ring->sched.thread)
5313 continue;
5314
0b2d2c2e 5315 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5316
bb5c7235 5317 if (need_emergency_restart)
7c6e68c7 5318 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5319 }
8f8c80f4 5320 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5321 }
5322
bb5c7235 5323 if (need_emergency_restart)
7c6e68c7
AG
5324 goto skip_sched_resume;
5325
1d721ed6
AG
5326 /*
5327 * Must check guilty signal here since after this point all old
5328 * HW fences are force signaled.
5329 *
5330 * job->base holds a reference to parent fence
5331 */
f6a3f660 5332 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5333 job_signaled = true;
1d721ed6
AG
5334 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5335 goto skip_hw_reset;
5336 }
5337
26bc5340 5338retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5339 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5340 if (gpu_reset_for_dev_remove) {
5341 /* Workaroud for ASICs need to disable SMC first */
5342 amdgpu_device_smu_fini_early(tmp_adev);
5343 }
f1549c09 5344 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5345 /*TODO Should we stop ?*/
5346 if (r) {
aac89168 5347 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5348 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5349 tmp_adev->asic_reset_res = r;
5350 }
247c7b0d
AG
5351
5352 /*
5353 * Drop all pending non scheduler resets. Scheduler resets
5354 * were already dropped during drm_sched_stop
5355 */
d193b12b 5356 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5357 }
5358
5359 /* Actual ASIC resets if needed.*/
4f30d920 5360 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5361 if (amdgpu_sriov_vf(adev)) {
5362 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5363 if (r)
5364 adev->asic_reset_res = r;
950d6425 5365
28606c4e
YC
5366 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5367 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5368 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
950d6425 5369 amdgpu_ras_resume(adev);
26bc5340 5370 } else {
f1549c09 5371 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5372 if (r && r == -EAGAIN)
26bc5340 5373 goto retry;
f5c7e779
YC
5374
5375 if (!r && gpu_reset_for_dev_remove)
5376 goto recover_end;
26bc5340
AG
5377 }
5378
1d721ed6
AG
5379skip_hw_reset:
5380
26bc5340 5381 /* Post ASIC reset for all devs .*/
655ce9cb 5382 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5383
1d721ed6
AG
5384 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5385 struct amdgpu_ring *ring = tmp_adev->rings[i];
5386
5387 if (!ring || !ring->sched.thread)
5388 continue;
5389
6868a2c4 5390 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5391 }
5392
693073a0 5393 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5394 amdgpu_mes_self_test(tmp_adev);
5395
1053b9c9 5396 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5397 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5398 }
5399
7258fa31
SK
5400 if (tmp_adev->asic_reset_res)
5401 r = tmp_adev->asic_reset_res;
5402
1d721ed6 5403 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5404
5405 if (r) {
5406 /* bad news, how to tell it to userspace ? */
12ffa55d 5407 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5408 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5409 } else {
12ffa55d 5410 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5411 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5412 DRM_WARN("smart shift update failed\n");
26bc5340 5413 }
7c6e68c7 5414 }
26bc5340 5415
7c6e68c7 5416skip_sched_resume:
655ce9cb 5417 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5418 /* unlock kfd: SRIOV would do it separately */
c004d44e 5419 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5420 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5421
5422 /* kfd_post_reset will do nothing if kfd device is not initialized,
5423 * need to bring up kfd here if it's not be initialized before
5424 */
5425 if (!adev->kfd.init_complete)
5426 amdgpu_amdkfd_device_init(adev);
5427
3f12acc8
EQ
5428 if (audio_suspended)
5429 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5430
5431 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5432
5433 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5434 }
5435
f5c7e779 5436recover_end:
e923be99
AG
5437 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5438 reset_list);
5439 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5440
9e94d22c 5441 if (hive) {
9e94d22c 5442 mutex_unlock(&hive->hive_lock);
d95e8e97 5443 amdgpu_put_xgmi_hive(hive);
9e94d22c 5444 }
26bc5340 5445
f287a3c5 5446 if (r)
26bc5340 5447 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5448
5449 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5450 return r;
5451}
5452
e3ecdffa
AD
5453/**
5454 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5455 *
5456 * @adev: amdgpu_device pointer
5457 *
5458 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5459 * and lanes) of the slot the device is in. Handles APUs and
5460 * virtualized environments where PCIE config space may not be available.
5461 */
5494d864 5462static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5463{
5d9a6330 5464 struct pci_dev *pdev;
c5313457
HK
5465 enum pci_bus_speed speed_cap, platform_speed_cap;
5466 enum pcie_link_width platform_link_width;
d0dd7f0c 5467
cd474ba0
AD
5468 if (amdgpu_pcie_gen_cap)
5469 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5470
cd474ba0
AD
5471 if (amdgpu_pcie_lane_cap)
5472 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5473
cd474ba0 5474 /* covers APUs as well */
04e85958 5475 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5476 if (adev->pm.pcie_gen_mask == 0)
5477 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5478 if (adev->pm.pcie_mlw_mask == 0)
5479 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5480 return;
cd474ba0 5481 }
d0dd7f0c 5482
c5313457
HK
5483 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5484 return;
5485
dbaa922b
AD
5486 pcie_bandwidth_available(adev->pdev, NULL,
5487 &platform_speed_cap, &platform_link_width);
c5313457 5488
cd474ba0 5489 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5490 /* asic caps */
5491 pdev = adev->pdev;
5492 speed_cap = pcie_get_speed_cap(pdev);
5493 if (speed_cap == PCI_SPEED_UNKNOWN) {
5494 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5495 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5496 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5497 } else {
2b3a1f51
FX
5498 if (speed_cap == PCIE_SPEED_32_0GT)
5499 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5500 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5501 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5502 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5503 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5504 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5505 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5506 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5507 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5508 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5509 else if (speed_cap == PCIE_SPEED_8_0GT)
5510 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5511 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5512 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5513 else if (speed_cap == PCIE_SPEED_5_0GT)
5514 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5515 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5516 else
5517 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5518 }
5519 /* platform caps */
c5313457 5520 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5521 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5522 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5523 } else {
2b3a1f51
FX
5524 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5525 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5526 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5527 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5528 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5529 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5530 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5531 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5532 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5533 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5534 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5535 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5536 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5537 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5538 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5539 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5540 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5541 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5542 else
5543 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5544
cd474ba0
AD
5545 }
5546 }
5547 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5548 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5549 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5550 } else {
c5313457 5551 switch (platform_link_width) {
5d9a6330 5552 case PCIE_LNK_X32:
cd474ba0
AD
5553 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5554 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5555 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5556 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5557 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5558 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5559 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5560 break;
5d9a6330 5561 case PCIE_LNK_X16:
cd474ba0
AD
5562 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5563 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5564 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5565 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5566 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5567 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5568 break;
5d9a6330 5569 case PCIE_LNK_X12:
cd474ba0
AD
5570 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5571 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5572 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5573 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5574 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5575 break;
5d9a6330 5576 case PCIE_LNK_X8:
cd474ba0
AD
5577 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5578 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5579 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5580 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5581 break;
5d9a6330 5582 case PCIE_LNK_X4:
cd474ba0
AD
5583 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5584 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5585 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5586 break;
5d9a6330 5587 case PCIE_LNK_X2:
cd474ba0
AD
5588 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5589 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5590 break;
5d9a6330 5591 case PCIE_LNK_X1:
cd474ba0
AD
5592 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5593 break;
5594 default:
5595 break;
5596 }
d0dd7f0c
AD
5597 }
5598 }
5599}
d38ceaf9 5600
08a2fd23
RE
5601/**
5602 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5603 *
5604 * @adev: amdgpu_device pointer
5605 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5606 *
5607 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5608 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5609 * @peer_adev.
5610 */
5611bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5612 struct amdgpu_device *peer_adev)
5613{
5614#ifdef CONFIG_HSA_AMD_P2P
5615 uint64_t address_mask = peer_adev->dev->dma_mask ?
5616 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5617 resource_size_t aper_limit =
5618 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5619 bool p2p_access =
5620 !adev->gmc.xgmi.connected_to_cpu &&
5621 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5622
5623 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5624 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5625 !(adev->gmc.aper_base & address_mask ||
5626 aper_limit & address_mask));
5627#else
5628 return false;
5629#endif
5630}
5631
361dbd01
AD
5632int amdgpu_device_baco_enter(struct drm_device *dev)
5633{
1348969a 5634 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5635 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5636
6ab68650 5637 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5638 return -ENOTSUPP;
5639
8ab0d6f0 5640 if (ras && adev->ras_enabled &&
acdae216 5641 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5642 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5643
9530273e 5644 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5645}
5646
5647int amdgpu_device_baco_exit(struct drm_device *dev)
5648{
1348969a 5649 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5650 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5651 int ret = 0;
361dbd01 5652
6ab68650 5653 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5654 return -ENOTSUPP;
5655
9530273e
EQ
5656 ret = amdgpu_dpm_baco_exit(adev);
5657 if (ret)
5658 return ret;
7a22677b 5659
8ab0d6f0 5660 if (ras && adev->ras_enabled &&
acdae216 5661 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5662 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5663
1bece222
CL
5664 if (amdgpu_passthrough(adev) &&
5665 adev->nbio.funcs->clear_doorbell_interrupt)
5666 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5667
7a22677b 5668 return 0;
361dbd01 5669}
c9a6b82f
AG
5670
5671/**
5672 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5673 * @pdev: PCI device struct
5674 * @state: PCI channel state
5675 *
5676 * Description: Called when a PCI error is detected.
5677 *
5678 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5679 */
5680pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5681{
5682 struct drm_device *dev = pci_get_drvdata(pdev);
5683 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5684 int i;
c9a6b82f
AG
5685
5686 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5687
6894305c
AG
5688 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5689 DRM_WARN("No support for XGMI hive yet...");
5690 return PCI_ERS_RESULT_DISCONNECT;
5691 }
5692
e17e27f9
GC
5693 adev->pci_channel_state = state;
5694
c9a6b82f
AG
5695 switch (state) {
5696 case pci_channel_io_normal:
5697 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5698 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5699 case pci_channel_io_frozen:
5700 /*
d0fb18b5 5701 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5702 * to GPU during PCI error recovery
5703 */
3675c2f2 5704 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5705 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5706
5707 /*
5708 * Block any work scheduling as we do for regular GPU reset
5709 * for the duration of the recovery
5710 */
5711 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5712 struct amdgpu_ring *ring = adev->rings[i];
5713
5714 if (!ring || !ring->sched.thread)
5715 continue;
5716
5717 drm_sched_stop(&ring->sched, NULL);
5718 }
8f8c80f4 5719 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5720 return PCI_ERS_RESULT_NEED_RESET;
5721 case pci_channel_io_perm_failure:
5722 /* Permanent error, prepare for device removal */
5723 return PCI_ERS_RESULT_DISCONNECT;
5724 }
5725
5726 return PCI_ERS_RESULT_NEED_RESET;
5727}
5728
5729/**
5730 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5731 * @pdev: pointer to PCI device
5732 */
5733pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5734{
5735
5736 DRM_INFO("PCI error: mmio enabled callback!!\n");
5737
5738 /* TODO - dump whatever for debugging purposes */
5739
5740 /* This called only if amdgpu_pci_error_detected returns
5741 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5742 * works, no need to reset slot.
5743 */
5744
5745 return PCI_ERS_RESULT_RECOVERED;
5746}
5747
5748/**
5749 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5750 * @pdev: PCI device struct
5751 *
5752 * Description: This routine is called by the pci error recovery
5753 * code after the PCI slot has been reset, just before we
5754 * should resume normal operations.
5755 */
5756pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5757{
5758 struct drm_device *dev = pci_get_drvdata(pdev);
5759 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5760 int r, i;
04442bf7 5761 struct amdgpu_reset_context reset_context;
362c7b91 5762 u32 memsize;
7ac71382 5763 struct list_head device_list;
c9a6b82f
AG
5764
5765 DRM_INFO("PCI error: slot reset callback!!\n");
5766
04442bf7
LL
5767 memset(&reset_context, 0, sizeof(reset_context));
5768
7ac71382 5769 INIT_LIST_HEAD(&device_list);
655ce9cb 5770 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5771
362c7b91
AG
5772 /* wait for asic to come out of reset */
5773 msleep(500);
5774
7ac71382 5775 /* Restore PCI confspace */
c1dd4aa6 5776 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5777
362c7b91
AG
5778 /* confirm ASIC came out of reset */
5779 for (i = 0; i < adev->usec_timeout; i++) {
5780 memsize = amdgpu_asic_get_config_memsize(adev);
5781
5782 if (memsize != 0xffffffff)
5783 break;
5784 udelay(1);
5785 }
5786 if (memsize == 0xffffffff) {
5787 r = -ETIME;
5788 goto out;
5789 }
5790
04442bf7
LL
5791 reset_context.method = AMD_RESET_METHOD_NONE;
5792 reset_context.reset_req_dev = adev;
5793 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5794 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5795
7afefb81 5796 adev->no_hw_access = true;
04442bf7 5797 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5798 adev->no_hw_access = false;
c9a6b82f
AG
5799 if (r)
5800 goto out;
5801
04442bf7 5802 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5803
5804out:
c9a6b82f 5805 if (!r) {
c1dd4aa6
AG
5806 if (amdgpu_device_cache_pci_state(adev->pdev))
5807 pci_restore_state(adev->pdev);
5808
c9a6b82f
AG
5809 DRM_INFO("PCIe error recovery succeeded\n");
5810 } else {
5811 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5812 amdgpu_device_unset_mp1_state(adev);
5813 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5814 }
5815
5816 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5817}
5818
5819/**
5820 * amdgpu_pci_resume() - resume normal ops after PCI reset
5821 * @pdev: pointer to PCI device
5822 *
5823 * Called when the error recovery driver tells us that its
505199a3 5824 * OK to resume normal operation.
c9a6b82f
AG
5825 */
5826void amdgpu_pci_resume(struct pci_dev *pdev)
5827{
5828 struct drm_device *dev = pci_get_drvdata(pdev);
5829 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5830 int i;
c9a6b82f 5831
c9a6b82f
AG
5832
5833 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5834
e17e27f9
GC
5835 /* Only continue execution for the case of pci_channel_io_frozen */
5836 if (adev->pci_channel_state != pci_channel_io_frozen)
5837 return;
5838
acd89fca
AG
5839 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5840 struct amdgpu_ring *ring = adev->rings[i];
5841
5842 if (!ring || !ring->sched.thread)
5843 continue;
5844
acd89fca
AG
5845 drm_sched_start(&ring->sched, true);
5846 }
5847
e923be99
AG
5848 amdgpu_device_unset_mp1_state(adev);
5849 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5850}
c1dd4aa6
AG
5851
5852bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5853{
5854 struct drm_device *dev = pci_get_drvdata(pdev);
5855 struct amdgpu_device *adev = drm_to_adev(dev);
5856 int r;
5857
5858 r = pci_save_state(pdev);
5859 if (!r) {
5860 kfree(adev->pci_state);
5861
5862 adev->pci_state = pci_store_saved_state(pdev);
5863
5864 if (!adev->pci_state) {
5865 DRM_ERROR("Failed to store PCI saved state");
5866 return false;
5867 }
5868 } else {
5869 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5870 return false;
5871 }
5872
5873 return true;
5874}
5875
5876bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5877{
5878 struct drm_device *dev = pci_get_drvdata(pdev);
5879 struct amdgpu_device *adev = drm_to_adev(dev);
5880 int r;
5881
5882 if (!adev->pci_state)
5883 return false;
5884
5885 r = pci_load_saved_state(pdev, adev->pci_state);
5886
5887 if (!r) {
5888 pci_restore_state(pdev);
5889 } else {
5890 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5891 return false;
5892 }
5893
5894 return true;
5895}
5896
810085dd
EH
5897void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5898 struct amdgpu_ring *ring)
5899{
5900#ifdef CONFIG_X86_64
b818a5d3 5901 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5902 return;
5903#endif
5904 if (adev->gmc.xgmi.connected_to_cpu)
5905 return;
5906
5907 if (ring && ring->funcs->emit_hdp_flush)
5908 amdgpu_ring_emit_hdp_flush(ring);
5909 else
5910 amdgpu_asic_flush_hdp(adev, ring);
5911}
c1dd4aa6 5912
810085dd
EH
5913void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5914 struct amdgpu_ring *ring)
5915{
5916#ifdef CONFIG_X86_64
b818a5d3 5917 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5918 return;
5919#endif
5920 if (adev->gmc.xgmi.connected_to_cpu)
5921 return;
c1dd4aa6 5922
810085dd
EH
5923 amdgpu_asic_invalidate_hdp(adev, ring);
5924}
34f3a4a9 5925
89a7a870
AG
5926int amdgpu_in_reset(struct amdgpu_device *adev)
5927{
5928 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
5929}
5930
34f3a4a9
LY
5931/**
5932 * amdgpu_device_halt() - bring hardware to some kind of halt state
5933 *
5934 * @adev: amdgpu_device pointer
5935 *
5936 * Bring hardware to some kind of halt state so that no one can touch it
5937 * any more. It will help to maintain error context when error occurred.
5938 * Compare to a simple hang, the system will keep stable at least for SSH
5939 * access. Then it should be trivial to inspect the hardware state and
5940 * see what's going on. Implemented as following:
5941 *
5942 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5943 * clears all CPU mappings to device, disallows remappings through page faults
5944 * 2. amdgpu_irq_disable_all() disables all interrupts
5945 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5946 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5947 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5948 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5949 * flush any in flight DMA operations
5950 */
5951void amdgpu_device_halt(struct amdgpu_device *adev)
5952{
5953 struct pci_dev *pdev = adev->pdev;
e0f943b4 5954 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 5955
2c1c7ba4 5956 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
5957 drm_dev_unplug(ddev);
5958
5959 amdgpu_irq_disable_all(adev);
5960
5961 amdgpu_fence_driver_hw_fini(adev);
5962
5963 adev->no_hw_access = true;
5964
5965 amdgpu_device_unmap_mmio(adev);
5966
5967 pci_disable_device(pdev);
5968 pci_wait_for_pending_transaction(pdev);
5969}
86700a40
XD
5970
5971u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5972 u32 reg)
5973{
5974 unsigned long flags, address, data;
5975 u32 r;
5976
5977 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5978 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5979
5980 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5981 WREG32(address, reg * 4);
5982 (void)RREG32(address);
5983 r = RREG32(data);
5984 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5985 return r;
5986}
5987
5988void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5989 u32 reg, u32 v)
5990{
5991 unsigned long flags, address, data;
5992
5993 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5994 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5995
5996 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5997 WREG32(address, reg * 4);
5998 (void)RREG32(address);
5999 WREG32(data, v);
6000 (void)RREG32(data);
6001 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6002}
68ce8b24
CK
6003
6004/**
6005 * amdgpu_device_switch_gang - switch to a new gang
6006 * @adev: amdgpu_device pointer
6007 * @gang: the gang to switch to
6008 *
6009 * Try to switch to a new gang.
6010 * Returns: NULL if we switched to the new gang or a reference to the current
6011 * gang leader.
6012 */
6013struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6014 struct dma_fence *gang)
6015{
6016 struct dma_fence *old = NULL;
6017
6018 do {
6019 dma_fence_put(old);
6020 rcu_read_lock();
6021 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6022 rcu_read_unlock();
6023
6024 if (old == gang)
6025 break;
6026
6027 if (!dma_fence_is_signaled(old))
6028 return old;
6029
6030 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6031 old, gang) != old);
6032
6033 dma_fence_put(old);
6034 return NULL;
6035}
220c8cc8
AD
6036
6037bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6038{
6039 switch (adev->asic_type) {
6040#ifdef CONFIG_DRM_AMDGPU_SI
6041 case CHIP_HAINAN:
6042#endif
6043 case CHIP_TOPAZ:
6044 /* chips with no display hardware */
6045 return false;
6046#ifdef CONFIG_DRM_AMDGPU_SI
6047 case CHIP_TAHITI:
6048 case CHIP_PITCAIRN:
6049 case CHIP_VERDE:
6050 case CHIP_OLAND:
6051#endif
6052#ifdef CONFIG_DRM_AMDGPU_CIK
6053 case CHIP_BONAIRE:
6054 case CHIP_HAWAII:
6055 case CHIP_KAVERI:
6056 case CHIP_KABINI:
6057 case CHIP_MULLINS:
6058#endif
6059 case CHIP_TONGA:
6060 case CHIP_FIJI:
6061 case CHIP_POLARIS10:
6062 case CHIP_POLARIS11:
6063 case CHIP_POLARIS12:
6064 case CHIP_VEGAM:
6065 case CHIP_CARRIZO:
6066 case CHIP_STONEY:
6067 /* chips with display hardware */
6068 return true;
6069 default:
6070 /* IP discovery */
6071 if (!adev->ip_versions[DCE_HWIP][0] ||
6072 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6073 return false;
6074 return true;
6075 }
6076}
81283fee
JZ
6077
6078uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6079 uint32_t inst, uint32_t reg_addr, char reg_name[],
6080 uint32_t expected_value, uint32_t mask)
6081{
6082 uint32_t ret = 0;
6083 uint32_t old_ = 0;
6084 uint32_t tmp_ = RREG32(reg_addr);
6085 uint32_t loop = adev->usec_timeout;
6086
6087 while ((tmp_ & (mask)) != (expected_value)) {
6088 if (old_ != tmp_) {
6089 loop = adev->usec_timeout;
6090 old_ = tmp_;
6091 } else
6092 udelay(1);
6093 tmp_ = RREG32(reg_addr);
6094 loop--;
6095 if (!loop) {
6096 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6097 inst, reg_name, (uint32_t)expected_value,
6098 (uint32_t)(tmp_ & (mask)));
6099 ret = -ETIMEDOUT;
6100 break;
6101 }
6102 }
6103 return ret;
6104}