drm/amdgpu: Use the correct API to read register
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
162static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166
KR
167/**
168 * DOC: product_name
169 *
170 * The amdgpu driver provides a sysfs API for reporting the product name
171 * for the device
2c496a6c 172 * The file product_name is used for this and returns the product name
bd607166
KR
173 * as returned from the FRU.
174 * NOTE: This is only available for certain server cards
175 */
176
177static ssize_t amdgpu_device_get_product_name(struct device *dev,
178 struct device_attribute *attr, char *buf)
179{
180 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 181 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 182
36000c7a 183 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
184}
185
186static DEVICE_ATTR(product_name, S_IRUGO,
187 amdgpu_device_get_product_name, NULL);
188
189/**
190 * DOC: product_number
191 *
192 * The amdgpu driver provides a sysfs API for reporting the part number
193 * for the device
2c496a6c 194 * The file product_number is used for this and returns the part number
bd607166
KR
195 * as returned from the FRU.
196 * NOTE: This is only available for certain server cards
197 */
198
199static ssize_t amdgpu_device_get_product_number(struct device *dev,
200 struct device_attribute *attr, char *buf)
201{
202 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 203 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 204
36000c7a 205 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
206}
207
208static DEVICE_ATTR(product_number, S_IRUGO,
209 amdgpu_device_get_product_number, NULL);
210
211/**
212 * DOC: serial_number
213 *
214 * The amdgpu driver provides a sysfs API for reporting the serial number
215 * for the device
216 * The file serial_number is used for this and returns the serial number
217 * as returned from the FRU.
218 * NOTE: This is only available for certain server cards
219 */
220
221static ssize_t amdgpu_device_get_serial_number(struct device *dev,
222 struct device_attribute *attr, char *buf)
223{
224 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 225 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 226
36000c7a 227 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
228}
229
230static DEVICE_ATTR(serial_number, S_IRUGO,
231 amdgpu_device_get_serial_number, NULL);
232
fd496ca8 233/**
b98c6299 234 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
235 *
236 * @dev: drm_device pointer
237 *
b98c6299 238 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
239 * otherwise return false.
240 */
b98c6299 241bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
242{
243 struct amdgpu_device *adev = drm_to_adev(dev);
244
b98c6299 245 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
246 return true;
247 return false;
248}
249
e3ecdffa 250/**
0330b848 251 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
252 *
253 * @dev: drm_device pointer
254 *
b98c6299 255 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
256 * otherwise return false.
257 */
31af062a 258bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 259{
1348969a 260 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 261
b98c6299
AD
262 if (adev->has_pr3 ||
263 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
264 return true;
265 return false;
266}
267
a69cba42
AD
268/**
269 * amdgpu_device_supports_baco - Does the device support BACO
270 *
271 * @dev: drm_device pointer
272 *
273 * Returns true if the device supporte BACO,
274 * otherwise return false.
275 */
276bool amdgpu_device_supports_baco(struct drm_device *dev)
277{
1348969a 278 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
279
280 return amdgpu_asic_supports_baco(adev);
281}
282
3fa8f89d
S
283/**
284 * amdgpu_device_supports_smart_shift - Is the device dGPU with
285 * smart shift support
286 *
287 * @dev: drm_device pointer
288 *
289 * Returns true if the device is a dGPU with Smart Shift support,
290 * otherwise returns false.
291 */
292bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
293{
294 return (amdgpu_device_supports_boco(dev) &&
295 amdgpu_acpi_is_power_shift_control_supported());
296}
297
6e3cd2a9
MCC
298/*
299 * VRAM access helper functions
300 */
301
e35e2b11 302/**
048af66b 303 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
304 *
305 * @adev: amdgpu_device pointer
306 * @pos: offset of the buffer in vram
307 * @buf: virtual address of the buffer in system memory
308 * @size: read/write size, sizeof(@buf) must > @size
309 * @write: true - write to vram, otherwise - read from vram
310 */
048af66b
KW
311void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
312 void *buf, size_t size, bool write)
e35e2b11 313{
e35e2b11 314 unsigned long flags;
048af66b
KW
315 uint32_t hi = ~0, tmp = 0;
316 uint32_t *data = buf;
ce05ac56 317 uint64_t last;
f89f8c6b 318 int idx;
ce05ac56 319
c58a863b 320 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 321 return;
9d11eb0d 322
048af66b
KW
323 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
324
325 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
326 for (last = pos + size; pos < last; pos += 4) {
327 tmp = pos >> 31;
328
329 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
330 if (tmp != hi) {
331 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
332 hi = tmp;
333 }
334 if (write)
335 WREG32_NO_KIQ(mmMM_DATA, *data++);
336 else
337 *data++ = RREG32_NO_KIQ(mmMM_DATA);
338 }
339
340 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
341 drm_dev_exit(idx);
342}
343
344/**
bbe04dec 345 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
346 *
347 * @adev: amdgpu_device pointer
348 * @pos: offset of the buffer in vram
349 * @buf: virtual address of the buffer in system memory
350 * @size: read/write size, sizeof(@buf) must > @size
351 * @write: true - write to vram, otherwise - read from vram
352 *
353 * The return value means how many bytes have been transferred.
354 */
355size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
356 void *buf, size_t size, bool write)
357{
9d11eb0d 358#ifdef CONFIG_64BIT
048af66b
KW
359 void __iomem *addr;
360 size_t count = 0;
361 uint64_t last;
362
363 if (!adev->mman.aper_base_kaddr)
364 return 0;
365
9d11eb0d
CK
366 last = min(pos + size, adev->gmc.visible_vram_size);
367 if (last > pos) {
048af66b
KW
368 addr = adev->mman.aper_base_kaddr + pos;
369 count = last - pos;
9d11eb0d
CK
370
371 if (write) {
372 memcpy_toio(addr, buf, count);
373 mb();
810085dd 374 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 375 } else {
810085dd 376 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
377 mb();
378 memcpy_fromio(buf, addr, count);
379 }
380
9d11eb0d 381 }
048af66b
KW
382
383 return count;
384#else
385 return 0;
9d11eb0d 386#endif
048af66b 387}
9d11eb0d 388
048af66b
KW
389/**
390 * amdgpu_device_vram_access - read/write a buffer in vram
391 *
392 * @adev: amdgpu_device pointer
393 * @pos: offset of the buffer in vram
394 * @buf: virtual address of the buffer in system memory
395 * @size: read/write size, sizeof(@buf) must > @size
396 * @write: true - write to vram, otherwise - read from vram
397 */
398void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
399 void *buf, size_t size, bool write)
400{
401 size_t count;
e35e2b11 402
048af66b
KW
403 /* try to using vram apreature to access vram first */
404 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
405 size -= count;
406 if (size) {
407 /* using MM to access rest vram */
408 pos += count;
409 buf += count;
410 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
411 }
412}
413
d38ceaf9 414/*
f7ee1874 415 * register access helper functions.
d38ceaf9 416 */
56b53c0b
DL
417
418/* Check if hw access should be skipped because of hotplug or device error */
419bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
420{
7afefb81 421 if (adev->no_hw_access)
56b53c0b
DL
422 return true;
423
424#ifdef CONFIG_LOCKDEP
425 /*
426 * This is a bit complicated to understand, so worth a comment. What we assert
427 * here is that the GPU reset is not running on another thread in parallel.
428 *
429 * For this we trylock the read side of the reset semaphore, if that succeeds
430 * we know that the reset is not running in paralell.
431 *
432 * If the trylock fails we assert that we are either already holding the read
433 * side of the lock or are the reset thread itself and hold the write side of
434 * the lock.
435 */
436 if (in_task()) {
d0fb18b5
AG
437 if (down_read_trylock(&adev->reset_domain->sem))
438 up_read(&adev->reset_domain->sem);
56b53c0b 439 else
d0fb18b5 440 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
441 }
442#endif
443 return false;
444}
445
e3ecdffa 446/**
f7ee1874 447 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
448 *
449 * @adev: amdgpu_device pointer
450 * @reg: dword aligned register offset
451 * @acc_flags: access flags which require special behavior
452 *
453 * Returns the 32 bit value from the offset specified.
454 */
f7ee1874
HZ
455uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
456 uint32_t reg, uint32_t acc_flags)
d38ceaf9 457{
f4b373f4
TSD
458 uint32_t ret;
459
56b53c0b 460 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
461 return 0;
462
f7ee1874
HZ
463 if ((reg * 4) < adev->rmmio_size) {
464 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
465 amdgpu_sriov_runtime(adev) &&
d0fb18b5 466 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 467 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 468 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
469 } else {
470 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
471 }
472 } else {
473 ret = adev->pcie_rreg(adev, reg * 4);
81202807 474 }
bc992ba5 475
f7ee1874 476 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 477
f4b373f4 478 return ret;
d38ceaf9
AD
479}
480
421a2a30
ML
481/*
482 * MMIO register read with bytes helper functions
483 * @offset:bytes offset from MMIO start
484 *
485*/
486
e3ecdffa
AD
487/**
488 * amdgpu_mm_rreg8 - read a memory mapped IO register
489 *
490 * @adev: amdgpu_device pointer
491 * @offset: byte aligned register offset
492 *
493 * Returns the 8 bit value from the offset specified.
494 */
7cbbc745
AG
495uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
496{
56b53c0b 497 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
498 return 0;
499
421a2a30
ML
500 if (offset < adev->rmmio_size)
501 return (readb(adev->rmmio + offset));
502 BUG();
503}
504
505/*
506 * MMIO register write with bytes helper functions
507 * @offset:bytes offset from MMIO start
508 * @value: the value want to be written to the register
509 *
510*/
e3ecdffa
AD
511/**
512 * amdgpu_mm_wreg8 - read a memory mapped IO register
513 *
514 * @adev: amdgpu_device pointer
515 * @offset: byte aligned register offset
516 * @value: 8 bit value to write
517 *
518 * Writes the value specified to the offset specified.
519 */
7cbbc745
AG
520void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
521{
56b53c0b 522 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
523 return;
524
421a2a30
ML
525 if (offset < adev->rmmio_size)
526 writeb(value, adev->rmmio + offset);
527 else
528 BUG();
529}
530
e3ecdffa 531/**
f7ee1874 532 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
533 *
534 * @adev: amdgpu_device pointer
535 * @reg: dword aligned register offset
536 * @v: 32 bit value to write to the register
537 * @acc_flags: access flags which require special behavior
538 *
539 * Writes the value specified to the offset specified.
540 */
f7ee1874
HZ
541void amdgpu_device_wreg(struct amdgpu_device *adev,
542 uint32_t reg, uint32_t v,
543 uint32_t acc_flags)
d38ceaf9 544{
56b53c0b 545 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
546 return;
547
f7ee1874
HZ
548 if ((reg * 4) < adev->rmmio_size) {
549 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
550 amdgpu_sriov_runtime(adev) &&
d0fb18b5 551 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 552 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 553 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
554 } else {
555 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
556 }
557 } else {
558 adev->pcie_wreg(adev, reg * 4, v);
81202807 559 }
bc992ba5 560
f7ee1874 561 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 562}
d38ceaf9 563
03f2abb0 564/**
4cc9f86f 565 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 566 *
71579346
RB
567 * @adev: amdgpu_device pointer
568 * @reg: mmio/rlc register
569 * @v: value to write
570 *
571 * this function is invoked only for the debugfs register access
03f2abb0 572 */
f7ee1874
HZ
573void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
574 uint32_t reg, uint32_t v)
2e0cc4d4 575{
56b53c0b 576 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
577 return;
578
2e0cc4d4 579 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
580 adev->gfx.rlc.funcs &&
581 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 582 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 583 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
584 } else if ((reg * 4) >= adev->rmmio_size) {
585 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
586 } else {
587 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 588 }
d38ceaf9
AD
589}
590
d38ceaf9
AD
591/**
592 * amdgpu_mm_rdoorbell - read a doorbell dword
593 *
594 * @adev: amdgpu_device pointer
595 * @index: doorbell index
596 *
597 * Returns the value in the doorbell aperture at the
598 * requested doorbell index (CIK).
599 */
600u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
601{
56b53c0b 602 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
603 return 0;
604
0512e9ff 605 if (index < adev->doorbell.num_kernel_doorbells) {
d38ceaf9
AD
606 return readl(adev->doorbell.ptr + index);
607 } else {
608 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
609 return 0;
610 }
611}
612
613/**
614 * amdgpu_mm_wdoorbell - write a doorbell dword
615 *
616 * @adev: amdgpu_device pointer
617 * @index: doorbell index
618 * @v: value to write
619 *
620 * Writes @v to the doorbell aperture at the
621 * requested doorbell index (CIK).
622 */
623void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
624{
56b53c0b 625 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
626 return;
627
0512e9ff 628 if (index < adev->doorbell.num_kernel_doorbells) {
d38ceaf9
AD
629 writel(v, adev->doorbell.ptr + index);
630 } else {
631 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
632 }
633}
634
832be404
KW
635/**
636 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
637 *
638 * @adev: amdgpu_device pointer
639 * @index: doorbell index
640 *
641 * Returns the value in the doorbell aperture at the
642 * requested doorbell index (VEGA10+).
643 */
644u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
645{
56b53c0b 646 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
647 return 0;
648
0512e9ff 649 if (index < adev->doorbell.num_kernel_doorbells) {
832be404
KW
650 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
651 } else {
652 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
653 return 0;
654 }
655}
656
657/**
658 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
659 *
660 * @adev: amdgpu_device pointer
661 * @index: doorbell index
662 * @v: value to write
663 *
664 * Writes @v to the doorbell aperture at the
665 * requested doorbell index (VEGA10+).
666 */
667void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
668{
56b53c0b 669 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
670 return;
671
0512e9ff 672 if (index < adev->doorbell.num_kernel_doorbells) {
832be404
KW
673 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
674 } else {
675 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
676 }
677}
678
1bba3683
HZ
679/**
680 * amdgpu_device_indirect_rreg - read an indirect register
681 *
682 * @adev: amdgpu_device pointer
22f453fb 683 * @reg_addr: indirect register address to read from
1bba3683
HZ
684 *
685 * Returns the value of indirect register @reg_addr
686 */
687u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
688 u32 reg_addr)
689{
65ba96e9 690 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
691 void __iomem *pcie_index_offset;
692 void __iomem *pcie_data_offset;
65ba96e9
HZ
693 u32 r;
694
695 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
696 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
697
698 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
699 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
700 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
701
702 writel(reg_addr, pcie_index_offset);
703 readl(pcie_index_offset);
704 r = readl(pcie_data_offset);
705 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
706
707 return r;
708}
709
710/**
711 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
712 *
713 * @adev: amdgpu_device pointer
22f453fb 714 * @reg_addr: indirect register address to read from
1bba3683
HZ
715 *
716 * Returns the value of indirect register @reg_addr
717 */
718u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
719 u32 reg_addr)
720{
65ba96e9 721 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
722 void __iomem *pcie_index_offset;
723 void __iomem *pcie_data_offset;
65ba96e9
HZ
724 u64 r;
725
726 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
727 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
728
729 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
730 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
731 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
732
733 /* read low 32 bits */
734 writel(reg_addr, pcie_index_offset);
735 readl(pcie_index_offset);
736 r = readl(pcie_data_offset);
737 /* read high 32 bits */
738 writel(reg_addr + 4, pcie_index_offset);
739 readl(pcie_index_offset);
740 r |= ((u64)readl(pcie_data_offset) << 32);
741 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
742
743 return r;
744}
745
746/**
747 * amdgpu_device_indirect_wreg - write an indirect register address
748 *
749 * @adev: amdgpu_device pointer
750 * @pcie_index: mmio register offset
751 * @pcie_data: mmio register offset
752 * @reg_addr: indirect register offset
753 * @reg_data: indirect register data
754 *
755 */
756void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
757 u32 reg_addr, u32 reg_data)
758{
65ba96e9 759 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
760 void __iomem *pcie_index_offset;
761 void __iomem *pcie_data_offset;
762
65ba96e9
HZ
763 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
764 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
765
1bba3683
HZ
766 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
767 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
768 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
769
770 writel(reg_addr, pcie_index_offset);
771 readl(pcie_index_offset);
772 writel(reg_data, pcie_data_offset);
773 readl(pcie_data_offset);
774 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
775}
776
777/**
778 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
779 *
780 * @adev: amdgpu_device pointer
781 * @pcie_index: mmio register offset
782 * @pcie_data: mmio register offset
783 * @reg_addr: indirect register offset
784 * @reg_data: indirect register data
785 *
786 */
787void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
788 u32 reg_addr, u64 reg_data)
789{
65ba96e9 790 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
791 void __iomem *pcie_index_offset;
792 void __iomem *pcie_data_offset;
793
65ba96e9
HZ
794 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
795 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
796
1bba3683
HZ
797 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
798 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
799 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
800
801 /* write low 32 bits */
802 writel(reg_addr, pcie_index_offset);
803 readl(pcie_index_offset);
804 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
805 readl(pcie_data_offset);
806 /* write high 32 bits */
807 writel(reg_addr + 4, pcie_index_offset);
808 readl(pcie_index_offset);
809 writel((u32)(reg_data >> 32), pcie_data_offset);
810 readl(pcie_data_offset);
811 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
812}
813
dabc114e
HZ
814/**
815 * amdgpu_device_get_rev_id - query device rev_id
816 *
817 * @adev: amdgpu_device pointer
818 *
819 * Return device rev_id
820 */
821u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
822{
823 return adev->nbio.funcs->get_rev_id(adev);
824}
825
d38ceaf9
AD
826/**
827 * amdgpu_invalid_rreg - dummy reg read function
828 *
982a820b 829 * @adev: amdgpu_device pointer
d38ceaf9
AD
830 * @reg: offset of register
831 *
832 * Dummy register read function. Used for register blocks
833 * that certain asics don't have (all asics).
834 * Returns the value in the register.
835 */
836static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
837{
838 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
839 BUG();
840 return 0;
841}
842
843/**
844 * amdgpu_invalid_wreg - dummy reg write function
845 *
982a820b 846 * @adev: amdgpu_device pointer
d38ceaf9
AD
847 * @reg: offset of register
848 * @v: value to write to the register
849 *
850 * Dummy register read function. Used for register blocks
851 * that certain asics don't have (all asics).
852 */
853static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
854{
855 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
856 reg, v);
857 BUG();
858}
859
4fa1c6a6
TZ
860/**
861 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
862 *
982a820b 863 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
864 * @reg: offset of register
865 *
866 * Dummy register read function. Used for register blocks
867 * that certain asics don't have (all asics).
868 * Returns the value in the register.
869 */
870static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
871{
872 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
873 BUG();
874 return 0;
875}
876
877/**
878 * amdgpu_invalid_wreg64 - dummy reg write function
879 *
982a820b 880 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
881 * @reg: offset of register
882 * @v: value to write to the register
883 *
884 * Dummy register read function. Used for register blocks
885 * that certain asics don't have (all asics).
886 */
887static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
888{
889 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
890 reg, v);
891 BUG();
892}
893
d38ceaf9
AD
894/**
895 * amdgpu_block_invalid_rreg - dummy reg read function
896 *
982a820b 897 * @adev: amdgpu_device pointer
d38ceaf9
AD
898 * @block: offset of instance
899 * @reg: offset of register
900 *
901 * Dummy register read function. Used for register blocks
902 * that certain asics don't have (all asics).
903 * Returns the value in the register.
904 */
905static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
906 uint32_t block, uint32_t reg)
907{
908 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
909 reg, block);
910 BUG();
911 return 0;
912}
913
914/**
915 * amdgpu_block_invalid_wreg - dummy reg write function
916 *
982a820b 917 * @adev: amdgpu_device pointer
d38ceaf9
AD
918 * @block: offset of instance
919 * @reg: offset of register
920 * @v: value to write to the register
921 *
922 * Dummy register read function. Used for register blocks
923 * that certain asics don't have (all asics).
924 */
925static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
926 uint32_t block,
927 uint32_t reg, uint32_t v)
928{
929 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
930 reg, block, v);
931 BUG();
932}
933
4d2997ab
AD
934/**
935 * amdgpu_device_asic_init - Wrapper for atom asic_init
936 *
982a820b 937 * @adev: amdgpu_device pointer
4d2997ab
AD
938 *
939 * Does any asic specific work and then calls atom asic init.
940 */
941static int amdgpu_device_asic_init(struct amdgpu_device *adev)
942{
943 amdgpu_asic_pre_asic_init(adev);
944
85d1bcc6
HZ
945 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
946 return amdgpu_atomfirmware_asic_init(adev, true);
947 else
948 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
949}
950
e3ecdffa 951/**
7ccfd79f 952 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 953 *
982a820b 954 * @adev: amdgpu_device pointer
e3ecdffa
AD
955 *
956 * Allocates a scratch page of VRAM for use by various things in the
957 * driver.
958 */
7ccfd79f 959static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 960{
7ccfd79f
CK
961 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
962 AMDGPU_GEM_DOMAIN_VRAM |
963 AMDGPU_GEM_DOMAIN_GTT,
964 &adev->mem_scratch.robj,
965 &adev->mem_scratch.gpu_addr,
966 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
967}
968
e3ecdffa 969/**
7ccfd79f 970 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 971 *
982a820b 972 * @adev: amdgpu_device pointer
e3ecdffa
AD
973 *
974 * Frees the VRAM scratch page.
975 */
7ccfd79f 976static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 977{
7ccfd79f 978 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
979}
980
981/**
9c3f2b54 982 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
983 *
984 * @adev: amdgpu_device pointer
985 * @registers: pointer to the register array
986 * @array_size: size of the register array
987 *
988 * Programs an array or registers with and and or masks.
989 * This is a helper for setting golden registers.
990 */
9c3f2b54
AD
991void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
992 const u32 *registers,
993 const u32 array_size)
d38ceaf9
AD
994{
995 u32 tmp, reg, and_mask, or_mask;
996 int i;
997
998 if (array_size % 3)
999 return;
1000
47fc644f 1001 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1002 reg = registers[i + 0];
1003 and_mask = registers[i + 1];
1004 or_mask = registers[i + 2];
1005
1006 if (and_mask == 0xffffffff) {
1007 tmp = or_mask;
1008 } else {
1009 tmp = RREG32(reg);
1010 tmp &= ~and_mask;
e0d07657
HZ
1011 if (adev->family >= AMDGPU_FAMILY_AI)
1012 tmp |= (or_mask & and_mask);
1013 else
1014 tmp |= or_mask;
d38ceaf9
AD
1015 }
1016 WREG32(reg, tmp);
1017 }
1018}
1019
e3ecdffa
AD
1020/**
1021 * amdgpu_device_pci_config_reset - reset the GPU
1022 *
1023 * @adev: amdgpu_device pointer
1024 *
1025 * Resets the GPU using the pci config reset sequence.
1026 * Only applicable to asics prior to vega10.
1027 */
8111c387 1028void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1029{
1030 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1031}
1032
af484df8
AD
1033/**
1034 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1035 *
1036 * @adev: amdgpu_device pointer
1037 *
1038 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1039 */
1040int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1041{
1042 return pci_reset_function(adev->pdev);
1043}
1044
d38ceaf9
AD
1045/*
1046 * GPU doorbell aperture helpers function.
1047 */
1048/**
06ec9070 1049 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1050 *
1051 * @adev: amdgpu_device pointer
1052 *
1053 * Init doorbell driver information (CIK)
1054 * Returns 0 on success, error on failure.
1055 */
06ec9070 1056static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1057{
6585661d 1058
705e519e
CK
1059 /* No doorbell on SI hardware generation */
1060 if (adev->asic_type < CHIP_BONAIRE) {
1061 adev->doorbell.base = 0;
1062 adev->doorbell.size = 0;
0512e9ff 1063 adev->doorbell.num_kernel_doorbells = 0;
705e519e
CK
1064 adev->doorbell.ptr = NULL;
1065 return 0;
1066 }
1067
d6895ad3
CK
1068 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1069 return -EINVAL;
1070
22357775
AD
1071 amdgpu_asic_init_doorbell_index(adev);
1072
d38ceaf9
AD
1073 /* doorbell bar mapping */
1074 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1075 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1076
de33a329 1077 if (adev->enable_mes) {
0512e9ff 1078 adev->doorbell.num_kernel_doorbells =
de33a329
JX
1079 adev->doorbell.size / sizeof(u32);
1080 } else {
0512e9ff 1081 adev->doorbell.num_kernel_doorbells =
de33a329
JX
1082 min_t(u32, adev->doorbell.size / sizeof(u32),
1083 adev->doorbell_index.max_assignment+1);
0512e9ff 1084 if (adev->doorbell.num_kernel_doorbells == 0)
de33a329
JX
1085 return -EINVAL;
1086
1087 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1088 * paging queue doorbell use the second page. The
1089 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1090 * doorbells are in the first page. So with paging queue enabled,
0512e9ff 1091 * the max num_kernel_doorbells should + 1 page (0x400 in dword)
de33a329
JX
1092 */
1093 if (adev->asic_type >= CHIP_VEGA10)
0512e9ff 1094 adev->doorbell.num_kernel_doorbells += 0x400;
de33a329 1095 }
ec3db8a6 1096
8972e5d2 1097 adev->doorbell.ptr = ioremap(adev->doorbell.base,
0512e9ff 1098 adev->doorbell.num_kernel_doorbells *
8972e5d2
CK
1099 sizeof(u32));
1100 if (adev->doorbell.ptr == NULL)
d38ceaf9 1101 return -ENOMEM;
d38ceaf9
AD
1102
1103 return 0;
1104}
1105
1106/**
06ec9070 1107 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1108 *
1109 * @adev: amdgpu_device pointer
1110 *
1111 * Tear down doorbell driver information (CIK)
1112 */
06ec9070 1113static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1114{
1115 iounmap(adev->doorbell.ptr);
1116 adev->doorbell.ptr = NULL;
1117}
1118
22cb0164 1119
d38ceaf9
AD
1120
1121/*
06ec9070 1122 * amdgpu_device_wb_*()
455a7bc2 1123 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1124 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1125 */
1126
1127/**
06ec9070 1128 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1129 *
1130 * @adev: amdgpu_device pointer
1131 *
1132 * Disables Writeback and frees the Writeback memory (all asics).
1133 * Used at driver shutdown.
1134 */
06ec9070 1135static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1136{
1137 if (adev->wb.wb_obj) {
a76ed485
AD
1138 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1139 &adev->wb.gpu_addr,
1140 (void **)&adev->wb.wb);
d38ceaf9
AD
1141 adev->wb.wb_obj = NULL;
1142 }
1143}
1144
1145/**
03f2abb0 1146 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1147 *
1148 * @adev: amdgpu_device pointer
1149 *
455a7bc2 1150 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1151 * Used at driver startup.
1152 * Returns 0 on success or an -error on failure.
1153 */
06ec9070 1154static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1155{
1156 int r;
1157
1158 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1159 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1160 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1161 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1162 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1163 (void **)&adev->wb.wb);
d38ceaf9
AD
1164 if (r) {
1165 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1166 return r;
1167 }
d38ceaf9
AD
1168
1169 adev->wb.num_wb = AMDGPU_MAX_WB;
1170 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1171
1172 /* clear wb memory */
73469585 1173 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1174 }
1175
1176 return 0;
1177}
1178
1179/**
131b4b36 1180 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1181 *
1182 * @adev: amdgpu_device pointer
1183 * @wb: wb index
1184 *
1185 * Allocate a wb slot for use by the driver (all asics).
1186 * Returns 0 on success or -EINVAL on failure.
1187 */
131b4b36 1188int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1189{
1190 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1191
97407b63 1192 if (offset < adev->wb.num_wb) {
7014285a 1193 __set_bit(offset, adev->wb.used);
63ae07ca 1194 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1195 return 0;
1196 } else {
1197 return -EINVAL;
1198 }
1199}
1200
d38ceaf9 1201/**
131b4b36 1202 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1203 *
1204 * @adev: amdgpu_device pointer
1205 * @wb: wb index
1206 *
1207 * Free a wb slot allocated for use by the driver (all asics)
1208 */
131b4b36 1209void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1210{
73469585 1211 wb >>= 3;
d38ceaf9 1212 if (wb < adev->wb.num_wb)
73469585 1213 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1214}
1215
d6895ad3
CK
1216/**
1217 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1218 *
1219 * @adev: amdgpu_device pointer
1220 *
1221 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1222 * to fail, but if any of the BARs is not accessible after the size we abort
1223 * driver loading by returning -ENODEV.
1224 */
1225int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1226{
453f617a 1227 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1228 struct pci_bus *root;
1229 struct resource *res;
1230 unsigned i;
d6895ad3
CK
1231 u16 cmd;
1232 int r;
1233
0c03b912 1234 /* Bypass for VF */
1235 if (amdgpu_sriov_vf(adev))
1236 return 0;
1237
b7221f2b
AD
1238 /* skip if the bios has already enabled large BAR */
1239 if (adev->gmc.real_vram_size &&
1240 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1241 return 0;
1242
31b8adab
CK
1243 /* Check if the root BUS has 64bit memory resources */
1244 root = adev->pdev->bus;
1245 while (root->parent)
1246 root = root->parent;
1247
1248 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1249 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1250 res->start > 0x100000000ull)
1251 break;
1252 }
1253
1254 /* Trying to resize is pointless without a root hub window above 4GB */
1255 if (!res)
1256 return 0;
1257
453f617a
ND
1258 /* Limit the BAR size to what is available */
1259 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1260 rbar_size);
1261
d6895ad3
CK
1262 /* Disable memory decoding while we change the BAR addresses and size */
1263 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1264 pci_write_config_word(adev->pdev, PCI_COMMAND,
1265 cmd & ~PCI_COMMAND_MEMORY);
1266
1267 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1268 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1269 if (adev->asic_type >= CHIP_BONAIRE)
1270 pci_release_resource(adev->pdev, 2);
1271
1272 pci_release_resource(adev->pdev, 0);
1273
1274 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1275 if (r == -ENOSPC)
1276 DRM_INFO("Not enough PCI address space for a large BAR.");
1277 else if (r && r != -ENOTSUPP)
1278 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1279
1280 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1281
1282 /* When the doorbell or fb BAR isn't available we have no chance of
1283 * using the device.
1284 */
06ec9070 1285 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1286 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1287 return -ENODEV;
1288
1289 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1290
1291 return 0;
1292}
a05502e5 1293
d38ceaf9
AD
1294/*
1295 * GPU helpers function.
1296 */
1297/**
39c640c0 1298 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1299 *
1300 * @adev: amdgpu_device pointer
1301 *
c836fec5
JQ
1302 * Check if the asic has been initialized (all asics) at driver startup
1303 * or post is needed if hw reset is performed.
1304 * Returns true if need or false if not.
d38ceaf9 1305 */
39c640c0 1306bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1307{
1308 uint32_t reg;
1309
bec86378
ML
1310 if (amdgpu_sriov_vf(adev))
1311 return false;
1312
1313 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1314 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1315 * some old smc fw still need driver do vPost otherwise gpu hang, while
1316 * those smc fw version above 22.15 doesn't have this flaw, so we force
1317 * vpost executed for smc version below 22.15
bec86378
ML
1318 */
1319 if (adev->asic_type == CHIP_FIJI) {
1320 int err;
1321 uint32_t fw_ver;
1322 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1323 /* force vPost if error occured */
1324 if (err)
1325 return true;
1326
1327 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1328 if (fw_ver < 0x00160e00)
1329 return true;
bec86378 1330 }
bec86378 1331 }
91fe77eb 1332
e3c1b071 1333 /* Don't post if we need to reset whole hive on init */
1334 if (adev->gmc.xgmi.pending_reset)
1335 return false;
1336
91fe77eb 1337 if (adev->has_hw_reset) {
1338 adev->has_hw_reset = false;
1339 return true;
1340 }
1341
1342 /* bios scratch used on CIK+ */
1343 if (adev->asic_type >= CHIP_BONAIRE)
1344 return amdgpu_atombios_scratch_need_asic_init(adev);
1345
1346 /* check MEM_SIZE for older asics */
1347 reg = amdgpu_asic_get_config_memsize(adev);
1348
1349 if ((reg != 0) && (reg != 0xffffffff))
1350 return false;
1351
1352 return true;
bec86378
ML
1353}
1354
0ab5d711
ML
1355/**
1356 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1357 *
1358 * @adev: amdgpu_device pointer
1359 *
1360 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1361 * be set for this device.
1362 *
1363 * Returns true if it should be used or false if not.
1364 */
1365bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1366{
1367 switch (amdgpu_aspm) {
1368 case -1:
1369 break;
1370 case 0:
1371 return false;
1372 case 1:
1373 return true;
1374 default:
1375 return false;
1376 }
1377 return pcie_aspm_enabled(adev->pdev);
1378}
1379
3ad5dcfe
KHF
1380bool amdgpu_device_aspm_support_quirk(void)
1381{
1382#if IS_ENABLED(CONFIG_X86)
1383 struct cpuinfo_x86 *c = &cpu_data(0);
1384
1385 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1386#else
1387 return true;
1388#endif
1389}
1390
d38ceaf9
AD
1391/* if we get transitioned to only one device, take VGA back */
1392/**
06ec9070 1393 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1394 *
bf44e8ce 1395 * @pdev: PCI device pointer
d38ceaf9
AD
1396 * @state: enable/disable vga decode
1397 *
1398 * Enable/disable vga decode (all asics).
1399 * Returns VGA resource flags.
1400 */
bf44e8ce
CH
1401static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1402 bool state)
d38ceaf9 1403{
bf44e8ce 1404 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1405 amdgpu_asic_set_vga_state(adev, state);
1406 if (state)
1407 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1408 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1409 else
1410 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1411}
1412
e3ecdffa
AD
1413/**
1414 * amdgpu_device_check_block_size - validate the vm block size
1415 *
1416 * @adev: amdgpu_device pointer
1417 *
1418 * Validates the vm block size specified via module parameter.
1419 * The vm block size defines number of bits in page table versus page directory,
1420 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1421 * page table and the remaining bits are in the page directory.
1422 */
06ec9070 1423static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1424{
1425 /* defines number of bits in page table versus page directory,
1426 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1427 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1428 if (amdgpu_vm_block_size == -1)
1429 return;
a1adf8be 1430
bab4fee7 1431 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1432 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1433 amdgpu_vm_block_size);
97489129 1434 amdgpu_vm_block_size = -1;
a1adf8be 1435 }
a1adf8be
CZ
1436}
1437
e3ecdffa
AD
1438/**
1439 * amdgpu_device_check_vm_size - validate the vm size
1440 *
1441 * @adev: amdgpu_device pointer
1442 *
1443 * Validates the vm size in GB specified via module parameter.
1444 * The VM size is the size of the GPU virtual memory space in GB.
1445 */
06ec9070 1446static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1447{
64dab074
AD
1448 /* no need to check the default value */
1449 if (amdgpu_vm_size == -1)
1450 return;
1451
83ca145d
ZJ
1452 if (amdgpu_vm_size < 1) {
1453 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1454 amdgpu_vm_size);
f3368128 1455 amdgpu_vm_size = -1;
83ca145d 1456 }
83ca145d
ZJ
1457}
1458
7951e376
RZ
1459static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1460{
1461 struct sysinfo si;
a9d4fe2f 1462 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1463 uint64_t total_memory;
1464 uint64_t dram_size_seven_GB = 0x1B8000000;
1465 uint64_t dram_size_three_GB = 0xB8000000;
1466
1467 if (amdgpu_smu_memory_pool_size == 0)
1468 return;
1469
1470 if (!is_os_64) {
1471 DRM_WARN("Not 64-bit OS, feature not supported\n");
1472 goto def_value;
1473 }
1474 si_meminfo(&si);
1475 total_memory = (uint64_t)si.totalram * si.mem_unit;
1476
1477 if ((amdgpu_smu_memory_pool_size == 1) ||
1478 (amdgpu_smu_memory_pool_size == 2)) {
1479 if (total_memory < dram_size_three_GB)
1480 goto def_value1;
1481 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1482 (amdgpu_smu_memory_pool_size == 8)) {
1483 if (total_memory < dram_size_seven_GB)
1484 goto def_value1;
1485 } else {
1486 DRM_WARN("Smu memory pool size not supported\n");
1487 goto def_value;
1488 }
1489 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1490
1491 return;
1492
1493def_value1:
1494 DRM_WARN("No enough system memory\n");
1495def_value:
1496 adev->pm.smu_prv_buffer_size = 0;
1497}
1498
9f6a7857
HR
1499static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1500{
1501 if (!(adev->flags & AMD_IS_APU) ||
1502 adev->asic_type < CHIP_RAVEN)
1503 return 0;
1504
1505 switch (adev->asic_type) {
1506 case CHIP_RAVEN:
1507 if (adev->pdev->device == 0x15dd)
1508 adev->apu_flags |= AMD_APU_IS_RAVEN;
1509 if (adev->pdev->device == 0x15d8)
1510 adev->apu_flags |= AMD_APU_IS_PICASSO;
1511 break;
1512 case CHIP_RENOIR:
1513 if ((adev->pdev->device == 0x1636) ||
1514 (adev->pdev->device == 0x164c))
1515 adev->apu_flags |= AMD_APU_IS_RENOIR;
1516 else
1517 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1518 break;
1519 case CHIP_VANGOGH:
1520 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1521 break;
1522 case CHIP_YELLOW_CARP:
1523 break;
d0f56dc2 1524 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1525 if ((adev->pdev->device == 0x13FE) ||
1526 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1527 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1528 break;
9f6a7857 1529 default:
4eaf21b7 1530 break;
9f6a7857
HR
1531 }
1532
1533 return 0;
1534}
1535
d38ceaf9 1536/**
06ec9070 1537 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1538 *
1539 * @adev: amdgpu_device pointer
1540 *
1541 * Validates certain module parameters and updates
1542 * the associated values used by the driver (all asics).
1543 */
912dfc84 1544static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1545{
5b011235
CZ
1546 if (amdgpu_sched_jobs < 4) {
1547 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1548 amdgpu_sched_jobs);
1549 amdgpu_sched_jobs = 4;
47fc644f 1550 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1551 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1552 amdgpu_sched_jobs);
1553 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1554 }
d38ceaf9 1555
83e74db6 1556 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1557 /* gart size must be greater or equal to 32M */
1558 dev_warn(adev->dev, "gart size (%d) too small\n",
1559 amdgpu_gart_size);
83e74db6 1560 amdgpu_gart_size = -1;
d38ceaf9
AD
1561 }
1562
36d38372 1563 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1564 /* gtt size must be greater or equal to 32M */
36d38372
CK
1565 dev_warn(adev->dev, "gtt size (%d) too small\n",
1566 amdgpu_gtt_size);
1567 amdgpu_gtt_size = -1;
d38ceaf9
AD
1568 }
1569
d07f14be
RH
1570 /* valid range is between 4 and 9 inclusive */
1571 if (amdgpu_vm_fragment_size != -1 &&
1572 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1573 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1574 amdgpu_vm_fragment_size = -1;
1575 }
1576
5d5bd5e3
KW
1577 if (amdgpu_sched_hw_submission < 2) {
1578 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1579 amdgpu_sched_hw_submission);
1580 amdgpu_sched_hw_submission = 2;
1581 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1582 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1583 amdgpu_sched_hw_submission);
1584 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1585 }
1586
2656fd23
AG
1587 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1588 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1589 amdgpu_reset_method = -1;
1590 }
1591
7951e376
RZ
1592 amdgpu_device_check_smu_prv_buffer_size(adev);
1593
06ec9070 1594 amdgpu_device_check_vm_size(adev);
d38ceaf9 1595
06ec9070 1596 amdgpu_device_check_block_size(adev);
6a7f76e7 1597
19aede77 1598 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1599
e3c00faa 1600 return 0;
d38ceaf9
AD
1601}
1602
1603/**
1604 * amdgpu_switcheroo_set_state - set switcheroo state
1605 *
1606 * @pdev: pci dev pointer
1694467b 1607 * @state: vga_switcheroo state
d38ceaf9 1608 *
12024b17 1609 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1610 * the asics before or after it is powered up using ACPI methods.
1611 */
8aba21b7
LT
1612static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1613 enum vga_switcheroo_state state)
d38ceaf9
AD
1614{
1615 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1616 int r;
d38ceaf9 1617
b98c6299 1618 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1619 return;
1620
1621 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1622 pr_info("switched on\n");
d38ceaf9
AD
1623 /* don't suspend or resume card normally */
1624 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1625
8f66090b
TZ
1626 pci_set_power_state(pdev, PCI_D0);
1627 amdgpu_device_load_pci_state(pdev);
1628 r = pci_enable_device(pdev);
de185019
AD
1629 if (r)
1630 DRM_WARN("pci_enable_device failed (%d)\n", r);
1631 amdgpu_device_resume(dev, true);
d38ceaf9 1632
d38ceaf9 1633 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1634 } else {
dd4fa6c1 1635 pr_info("switched off\n");
d38ceaf9 1636 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1637 amdgpu_device_suspend(dev, true);
8f66090b 1638 amdgpu_device_cache_pci_state(pdev);
de185019 1639 /* Shut down the device */
8f66090b
TZ
1640 pci_disable_device(pdev);
1641 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1642 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1643 }
1644}
1645
1646/**
1647 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1648 *
1649 * @pdev: pci dev pointer
1650 *
1651 * Callback for the switcheroo driver. Check of the switcheroo
1652 * state can be changed.
1653 * Returns true if the state can be changed, false if not.
1654 */
1655static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1656{
1657 struct drm_device *dev = pci_get_drvdata(pdev);
1658
1659 /*
1660 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1661 * locking inversion with the driver load path. And the access here is
1662 * completely racy anyway. So don't bother with locking for now.
1663 */
7e13ad89 1664 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1665}
1666
1667static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1668 .set_gpu_state = amdgpu_switcheroo_set_state,
1669 .reprobe = NULL,
1670 .can_switch = amdgpu_switcheroo_can_switch,
1671};
1672
e3ecdffa
AD
1673/**
1674 * amdgpu_device_ip_set_clockgating_state - set the CG state
1675 *
87e3f136 1676 * @dev: amdgpu_device pointer
e3ecdffa
AD
1677 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1678 * @state: clockgating state (gate or ungate)
1679 *
1680 * Sets the requested clockgating state for all instances of
1681 * the hardware IP specified.
1682 * Returns the error code from the last instance.
1683 */
43fa561f 1684int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1685 enum amd_ip_block_type block_type,
1686 enum amd_clockgating_state state)
d38ceaf9 1687{
43fa561f 1688 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1689 int i, r = 0;
1690
1691 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1692 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1693 continue;
c722865a
RZ
1694 if (adev->ip_blocks[i].version->type != block_type)
1695 continue;
1696 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1697 continue;
1698 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1699 (void *)adev, state);
1700 if (r)
1701 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1702 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1703 }
1704 return r;
1705}
1706
e3ecdffa
AD
1707/**
1708 * amdgpu_device_ip_set_powergating_state - set the PG state
1709 *
87e3f136 1710 * @dev: amdgpu_device pointer
e3ecdffa
AD
1711 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1712 * @state: powergating state (gate or ungate)
1713 *
1714 * Sets the requested powergating state for all instances of
1715 * the hardware IP specified.
1716 * Returns the error code from the last instance.
1717 */
43fa561f 1718int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1719 enum amd_ip_block_type block_type,
1720 enum amd_powergating_state state)
d38ceaf9 1721{
43fa561f 1722 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1723 int i, r = 0;
1724
1725 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1726 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1727 continue;
c722865a
RZ
1728 if (adev->ip_blocks[i].version->type != block_type)
1729 continue;
1730 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1731 continue;
1732 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1733 (void *)adev, state);
1734 if (r)
1735 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1736 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1737 }
1738 return r;
1739}
1740
e3ecdffa
AD
1741/**
1742 * amdgpu_device_ip_get_clockgating_state - get the CG state
1743 *
1744 * @adev: amdgpu_device pointer
1745 * @flags: clockgating feature flags
1746 *
1747 * Walks the list of IPs on the device and updates the clockgating
1748 * flags for each IP.
1749 * Updates @flags with the feature flags for each hardware IP where
1750 * clockgating is enabled.
1751 */
2990a1fc 1752void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1753 u64 *flags)
6cb2d4e4
HR
1754{
1755 int i;
1756
1757 for (i = 0; i < adev->num_ip_blocks; i++) {
1758 if (!adev->ip_blocks[i].status.valid)
1759 continue;
1760 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1761 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1762 }
1763}
1764
e3ecdffa
AD
1765/**
1766 * amdgpu_device_ip_wait_for_idle - wait for idle
1767 *
1768 * @adev: amdgpu_device pointer
1769 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1770 *
1771 * Waits for the request hardware IP to be idle.
1772 * Returns 0 for success or a negative error code on failure.
1773 */
2990a1fc
AD
1774int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1775 enum amd_ip_block_type block_type)
5dbbb60b
AD
1776{
1777 int i, r;
1778
1779 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1780 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1781 continue;
a1255107
AD
1782 if (adev->ip_blocks[i].version->type == block_type) {
1783 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1784 if (r)
1785 return r;
1786 break;
1787 }
1788 }
1789 return 0;
1790
1791}
1792
e3ecdffa
AD
1793/**
1794 * amdgpu_device_ip_is_idle - is the hardware IP idle
1795 *
1796 * @adev: amdgpu_device pointer
1797 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1798 *
1799 * Check if the hardware IP is idle or not.
1800 * Returns true if it the IP is idle, false if not.
1801 */
2990a1fc
AD
1802bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1803 enum amd_ip_block_type block_type)
5dbbb60b
AD
1804{
1805 int i;
1806
1807 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1808 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1809 continue;
a1255107
AD
1810 if (adev->ip_blocks[i].version->type == block_type)
1811 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1812 }
1813 return true;
1814
1815}
1816
e3ecdffa
AD
1817/**
1818 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1819 *
1820 * @adev: amdgpu_device pointer
87e3f136 1821 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1822 *
1823 * Returns a pointer to the hardware IP block structure
1824 * if it exists for the asic, otherwise NULL.
1825 */
2990a1fc
AD
1826struct amdgpu_ip_block *
1827amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1828 enum amd_ip_block_type type)
d38ceaf9
AD
1829{
1830 int i;
1831
1832 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1833 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1834 return &adev->ip_blocks[i];
1835
1836 return NULL;
1837}
1838
1839/**
2990a1fc 1840 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1841 *
1842 * @adev: amdgpu_device pointer
5fc3aeeb 1843 * @type: enum amd_ip_block_type
d38ceaf9
AD
1844 * @major: major version
1845 * @minor: minor version
1846 *
1847 * return 0 if equal or greater
1848 * return 1 if smaller or the ip_block doesn't exist
1849 */
2990a1fc
AD
1850int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1851 enum amd_ip_block_type type,
1852 u32 major, u32 minor)
d38ceaf9 1853{
2990a1fc 1854 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1855
a1255107
AD
1856 if (ip_block && ((ip_block->version->major > major) ||
1857 ((ip_block->version->major == major) &&
1858 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1859 return 0;
1860
1861 return 1;
1862}
1863
a1255107 1864/**
2990a1fc 1865 * amdgpu_device_ip_block_add
a1255107
AD
1866 *
1867 * @adev: amdgpu_device pointer
1868 * @ip_block_version: pointer to the IP to add
1869 *
1870 * Adds the IP block driver information to the collection of IPs
1871 * on the asic.
1872 */
2990a1fc
AD
1873int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1874 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1875{
1876 if (!ip_block_version)
1877 return -EINVAL;
1878
7bd939d0
LG
1879 switch (ip_block_version->type) {
1880 case AMD_IP_BLOCK_TYPE_VCN:
1881 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1882 return 0;
1883 break;
1884 case AMD_IP_BLOCK_TYPE_JPEG:
1885 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1886 return 0;
1887 break;
1888 default:
1889 break;
1890 }
1891
e966a725 1892 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1893 ip_block_version->funcs->name);
1894
a1255107
AD
1895 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1896
1897 return 0;
1898}
1899
e3ecdffa
AD
1900/**
1901 * amdgpu_device_enable_virtual_display - enable virtual display feature
1902 *
1903 * @adev: amdgpu_device pointer
1904 *
1905 * Enabled the virtual display feature if the user has enabled it via
1906 * the module parameter virtual_display. This feature provides a virtual
1907 * display hardware on headless boards or in virtualized environments.
1908 * This function parses and validates the configuration string specified by
1909 * the user and configues the virtual display configuration (number of
1910 * virtual connectors, crtcs, etc.) specified.
1911 */
483ef985 1912static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1913{
1914 adev->enable_virtual_display = false;
1915
1916 if (amdgpu_virtual_display) {
8f66090b 1917 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1918 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1919
1920 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1921 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1922 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1923 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1924 if (!strcmp("all", pciaddname)
1925 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1926 long num_crtc;
1927 int res = -1;
1928
9accf2fd 1929 adev->enable_virtual_display = true;
0f66356d
ED
1930
1931 if (pciaddname_tmp)
1932 res = kstrtol(pciaddname_tmp, 10,
1933 &num_crtc);
1934
1935 if (!res) {
1936 if (num_crtc < 1)
1937 num_crtc = 1;
1938 if (num_crtc > 6)
1939 num_crtc = 6;
1940 adev->mode_info.num_crtc = num_crtc;
1941 } else {
1942 adev->mode_info.num_crtc = 1;
1943 }
9accf2fd
ED
1944 break;
1945 }
1946 }
1947
0f66356d
ED
1948 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1949 amdgpu_virtual_display, pci_address_name,
1950 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1951
1952 kfree(pciaddstr);
1953 }
1954}
1955
25263da3
AD
1956void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1957{
1958 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1959 adev->mode_info.num_crtc = 1;
1960 adev->enable_virtual_display = true;
1961 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1962 adev->enable_virtual_display, adev->mode_info.num_crtc);
1963 }
1964}
1965
e3ecdffa
AD
1966/**
1967 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1968 *
1969 * @adev: amdgpu_device pointer
1970 *
1971 * Parses the asic configuration parameters specified in the gpu info
1972 * firmware and makes them availale to the driver for use in configuring
1973 * the asic.
1974 * Returns 0 on success, -EINVAL on failure.
1975 */
e2a75f88
AD
1976static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1977{
e2a75f88 1978 const char *chip_name;
c0a43457 1979 char fw_name[40];
e2a75f88
AD
1980 int err;
1981 const struct gpu_info_firmware_header_v1_0 *hdr;
1982
ab4fe3e1
HR
1983 adev->firmware.gpu_info_fw = NULL;
1984
72de33f8 1985 if (adev->mman.discovery_bin) {
cc375d8c
TY
1986 /*
1987 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1988 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1989 * when DAL no longer needs it.
1990 */
1991 if (adev->asic_type != CHIP_NAVI12)
1992 return 0;
258620d0
AD
1993 }
1994
e2a75f88 1995 switch (adev->asic_type) {
e2a75f88
AD
1996 default:
1997 return 0;
1998 case CHIP_VEGA10:
1999 chip_name = "vega10";
2000 break;
3f76dced
AD
2001 case CHIP_VEGA12:
2002 chip_name = "vega12";
2003 break;
2d2e5e7e 2004 case CHIP_RAVEN:
54f78a76 2005 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2006 chip_name = "raven2";
54f78a76 2007 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2008 chip_name = "picasso";
54c4d17e
FX
2009 else
2010 chip_name = "raven";
2d2e5e7e 2011 break;
65e60f6e
LM
2012 case CHIP_ARCTURUS:
2013 chip_name = "arcturus";
2014 break;
42b325e5
XY
2015 case CHIP_NAVI12:
2016 chip_name = "navi12";
2017 break;
e2a75f88
AD
2018 }
2019
2020 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2021 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2022 if (err) {
2023 dev_err(adev->dev,
b31d3063 2024 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2025 fw_name);
2026 goto out;
2027 }
2028
ab4fe3e1 2029 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2030 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2031
2032 switch (hdr->version_major) {
2033 case 1:
2034 {
2035 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2036 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2037 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2038
cc375d8c
TY
2039 /*
2040 * Should be droped when DAL no longer needs it.
2041 */
2042 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2043 goto parse_soc_bounding_box;
2044
b5ab16bf
AD
2045 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2046 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2047 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2048 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2049 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2050 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2051 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2052 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2053 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2054 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2055 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2056 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2057 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2058 adev->gfx.cu_info.max_waves_per_simd =
2059 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2060 adev->gfx.cu_info.max_scratch_slots_per_cu =
2061 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2062 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2063 if (hdr->version_minor >= 1) {
35c2e910
HZ
2064 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2065 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2066 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2067 adev->gfx.config.num_sc_per_sh =
2068 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2069 adev->gfx.config.num_packer_per_sc =
2070 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2071 }
ec51d3fa
XY
2072
2073parse_soc_bounding_box:
ec51d3fa
XY
2074 /*
2075 * soc bounding box info is not integrated in disocovery table,
258620d0 2076 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2077 */
48321c3d
HW
2078 if (hdr->version_minor == 2) {
2079 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2080 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2081 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2082 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2083 }
e2a75f88
AD
2084 break;
2085 }
2086 default:
2087 dev_err(adev->dev,
2088 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2089 err = -EINVAL;
2090 goto out;
2091 }
2092out:
e2a75f88
AD
2093 return err;
2094}
2095
e3ecdffa
AD
2096/**
2097 * amdgpu_device_ip_early_init - run early init for hardware IPs
2098 *
2099 * @adev: amdgpu_device pointer
2100 *
2101 * Early initialization pass for hardware IPs. The hardware IPs that make
2102 * up each asic are discovered each IP's early_init callback is run. This
2103 * is the first stage in initializing the asic.
2104 * Returns 0 on success, negative error code on failure.
2105 */
06ec9070 2106static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2107{
901e2be2
AD
2108 struct drm_device *dev = adev_to_drm(adev);
2109 struct pci_dev *parent;
aaa36a97 2110 int i, r;
ced69502 2111 bool total;
d38ceaf9 2112
483ef985 2113 amdgpu_device_enable_virtual_display(adev);
a6be7570 2114
00a979f3 2115 if (amdgpu_sriov_vf(adev)) {
00a979f3 2116 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2117 if (r)
2118 return r;
00a979f3
WS
2119 }
2120
d38ceaf9 2121 switch (adev->asic_type) {
33f34802
KW
2122#ifdef CONFIG_DRM_AMDGPU_SI
2123 case CHIP_VERDE:
2124 case CHIP_TAHITI:
2125 case CHIP_PITCAIRN:
2126 case CHIP_OLAND:
2127 case CHIP_HAINAN:
295d0daf 2128 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2129 r = si_set_ip_blocks(adev);
2130 if (r)
2131 return r;
2132 break;
2133#endif
a2e73f56
AD
2134#ifdef CONFIG_DRM_AMDGPU_CIK
2135 case CHIP_BONAIRE:
2136 case CHIP_HAWAII:
2137 case CHIP_KAVERI:
2138 case CHIP_KABINI:
2139 case CHIP_MULLINS:
e1ad2d53 2140 if (adev->flags & AMD_IS_APU)
a2e73f56 2141 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2142 else
2143 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2144
2145 r = cik_set_ip_blocks(adev);
2146 if (r)
2147 return r;
2148 break;
2149#endif
da87c30b
AD
2150 case CHIP_TOPAZ:
2151 case CHIP_TONGA:
2152 case CHIP_FIJI:
2153 case CHIP_POLARIS10:
2154 case CHIP_POLARIS11:
2155 case CHIP_POLARIS12:
2156 case CHIP_VEGAM:
2157 case CHIP_CARRIZO:
2158 case CHIP_STONEY:
2159 if (adev->flags & AMD_IS_APU)
2160 adev->family = AMDGPU_FAMILY_CZ;
2161 else
2162 adev->family = AMDGPU_FAMILY_VI;
2163
2164 r = vi_set_ip_blocks(adev);
2165 if (r)
2166 return r;
2167 break;
d38ceaf9 2168 default:
63352b7f
AD
2169 r = amdgpu_discovery_set_ip_blocks(adev);
2170 if (r)
2171 return r;
2172 break;
d38ceaf9
AD
2173 }
2174
901e2be2
AD
2175 if (amdgpu_has_atpx() &&
2176 (amdgpu_is_atpx_hybrid() ||
2177 amdgpu_has_atpx_dgpu_power_cntl()) &&
2178 ((adev->flags & AMD_IS_APU) == 0) &&
2179 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2180 adev->flags |= AMD_IS_PX;
2181
85ac2021
AD
2182 if (!(adev->flags & AMD_IS_APU)) {
2183 parent = pci_upstream_bridge(adev->pdev);
2184 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2185 }
901e2be2 2186
1884734a 2187
3b94fb10 2188 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2189 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2190 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2191 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2192 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2193
ced69502 2194 total = true;
d38ceaf9
AD
2195 for (i = 0; i < adev->num_ip_blocks; i++) {
2196 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2197 DRM_ERROR("disabled ip block: %d <%s>\n",
2198 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2199 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2200 } else {
a1255107
AD
2201 if (adev->ip_blocks[i].version->funcs->early_init) {
2202 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2203 if (r == -ENOENT) {
a1255107 2204 adev->ip_blocks[i].status.valid = false;
2c1a2784 2205 } else if (r) {
a1255107
AD
2206 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2207 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2208 total = false;
2c1a2784 2209 } else {
a1255107 2210 adev->ip_blocks[i].status.valid = true;
2c1a2784 2211 }
974e6b64 2212 } else {
a1255107 2213 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2214 }
d38ceaf9 2215 }
21a249ca
AD
2216 /* get the vbios after the asic_funcs are set up */
2217 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2218 r = amdgpu_device_parse_gpu_info_fw(adev);
2219 if (r)
2220 return r;
2221
21a249ca
AD
2222 /* Read BIOS */
2223 if (!amdgpu_get_bios(adev))
2224 return -EINVAL;
2225
2226 r = amdgpu_atombios_init(adev);
2227 if (r) {
2228 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2229 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2230 return r;
2231 }
77eabc6f
PJZ
2232
2233 /*get pf2vf msg info at it's earliest time*/
2234 if (amdgpu_sriov_vf(adev))
2235 amdgpu_virt_init_data_exchange(adev);
2236
21a249ca 2237 }
d38ceaf9 2238 }
ced69502
ML
2239 if (!total)
2240 return -ENODEV;
d38ceaf9 2241
00fa4035 2242 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2243 adev->cg_flags &= amdgpu_cg_mask;
2244 adev->pg_flags &= amdgpu_pg_mask;
2245
d38ceaf9
AD
2246 return 0;
2247}
2248
0a4f2520
RZ
2249static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2250{
2251 int i, r;
2252
2253 for (i = 0; i < adev->num_ip_blocks; i++) {
2254 if (!adev->ip_blocks[i].status.sw)
2255 continue;
2256 if (adev->ip_blocks[i].status.hw)
2257 continue;
2258 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2259 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2260 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2261 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2262 if (r) {
2263 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2264 adev->ip_blocks[i].version->funcs->name, r);
2265 return r;
2266 }
2267 adev->ip_blocks[i].status.hw = true;
2268 }
2269 }
2270
2271 return 0;
2272}
2273
2274static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2275{
2276 int i, r;
2277
2278 for (i = 0; i < adev->num_ip_blocks; i++) {
2279 if (!adev->ip_blocks[i].status.sw)
2280 continue;
2281 if (adev->ip_blocks[i].status.hw)
2282 continue;
2283 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2284 if (r) {
2285 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2286 adev->ip_blocks[i].version->funcs->name, r);
2287 return r;
2288 }
2289 adev->ip_blocks[i].status.hw = true;
2290 }
2291
2292 return 0;
2293}
2294
7a3e0bb2
RZ
2295static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2296{
2297 int r = 0;
2298 int i;
80f41f84 2299 uint32_t smu_version;
7a3e0bb2
RZ
2300
2301 if (adev->asic_type >= CHIP_VEGA10) {
2302 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2303 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2304 continue;
2305
e3c1b071 2306 if (!adev->ip_blocks[i].status.sw)
2307 continue;
2308
482f0e53
ML
2309 /* no need to do the fw loading again if already done*/
2310 if (adev->ip_blocks[i].status.hw == true)
2311 break;
2312
53b3f8f4 2313 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2314 r = adev->ip_blocks[i].version->funcs->resume(adev);
2315 if (r) {
2316 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2317 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2318 return r;
2319 }
2320 } else {
2321 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2322 if (r) {
2323 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2324 adev->ip_blocks[i].version->funcs->name, r);
2325 return r;
7a3e0bb2 2326 }
7a3e0bb2 2327 }
482f0e53
ML
2328
2329 adev->ip_blocks[i].status.hw = true;
2330 break;
7a3e0bb2
RZ
2331 }
2332 }
482f0e53 2333
8973d9ec
ED
2334 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2335 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2336
80f41f84 2337 return r;
7a3e0bb2
RZ
2338}
2339
5fd8518d
AG
2340static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2341{
2342 long timeout;
2343 int r, i;
2344
2345 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2346 struct amdgpu_ring *ring = adev->rings[i];
2347
2348 /* No need to setup the GPU scheduler for rings that don't need it */
2349 if (!ring || ring->no_scheduler)
2350 continue;
2351
2352 switch (ring->funcs->type) {
2353 case AMDGPU_RING_TYPE_GFX:
2354 timeout = adev->gfx_timeout;
2355 break;
2356 case AMDGPU_RING_TYPE_COMPUTE:
2357 timeout = adev->compute_timeout;
2358 break;
2359 case AMDGPU_RING_TYPE_SDMA:
2360 timeout = adev->sdma_timeout;
2361 break;
2362 default:
2363 timeout = adev->video_timeout;
2364 break;
2365 }
2366
2367 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2368 ring->num_hw_submission, 0,
8ab62eda
JG
2369 timeout, adev->reset_domain->wq,
2370 ring->sched_score, ring->name,
2371 adev->dev);
5fd8518d
AG
2372 if (r) {
2373 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2374 ring->name);
2375 return r;
2376 }
2377 }
2378
2379 return 0;
2380}
2381
2382
e3ecdffa
AD
2383/**
2384 * amdgpu_device_ip_init - run init for hardware IPs
2385 *
2386 * @adev: amdgpu_device pointer
2387 *
2388 * Main initialization pass for hardware IPs. The list of all the hardware
2389 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2390 * are run. sw_init initializes the software state associated with each IP
2391 * and hw_init initializes the hardware associated with each IP.
2392 * Returns 0 on success, negative error code on failure.
2393 */
06ec9070 2394static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2395{
2396 int i, r;
2397
c030f2e4 2398 r = amdgpu_ras_init(adev);
2399 if (r)
2400 return r;
2401
d38ceaf9 2402 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2403 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2404 continue;
a1255107 2405 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2406 if (r) {
a1255107
AD
2407 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2408 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2409 goto init_failed;
2c1a2784 2410 }
a1255107 2411 adev->ip_blocks[i].status.sw = true;
bfca0289 2412
c1c39032
AD
2413 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2414 /* need to do common hw init early so everything is set up for gmc */
2415 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2416 if (r) {
2417 DRM_ERROR("hw_init %d failed %d\n", i, r);
2418 goto init_failed;
2419 }
2420 adev->ip_blocks[i].status.hw = true;
2421 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2422 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2423 /* Try to reserve bad pages early */
2424 if (amdgpu_sriov_vf(adev))
2425 amdgpu_virt_exchange_data(adev);
2426
7ccfd79f 2427 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2428 if (r) {
7ccfd79f 2429 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2430 goto init_failed;
2c1a2784 2431 }
a1255107 2432 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2433 if (r) {
2434 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2435 goto init_failed;
2c1a2784 2436 }
06ec9070 2437 r = amdgpu_device_wb_init(adev);
2c1a2784 2438 if (r) {
06ec9070 2439 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2440 goto init_failed;
2c1a2784 2441 }
a1255107 2442 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2443
2444 /* right after GMC hw init, we create CSA */
8a1fbb4a 2445 if (amdgpu_mcbp) {
1e256e27 2446 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2447 AMDGPU_GEM_DOMAIN_VRAM |
2448 AMDGPU_GEM_DOMAIN_GTT,
2449 AMDGPU_CSA_SIZE);
2493664f
ML
2450 if (r) {
2451 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2452 goto init_failed;
2493664f
ML
2453 }
2454 }
d38ceaf9
AD
2455 }
2456 }
2457
c9ffa427 2458 if (amdgpu_sriov_vf(adev))
22c16d25 2459 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2460
533aed27
AG
2461 r = amdgpu_ib_pool_init(adev);
2462 if (r) {
2463 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2464 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2465 goto init_failed;
2466 }
2467
c8963ea4
RZ
2468 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2469 if (r)
72d3f592 2470 goto init_failed;
0a4f2520
RZ
2471
2472 r = amdgpu_device_ip_hw_init_phase1(adev);
2473 if (r)
72d3f592 2474 goto init_failed;
0a4f2520 2475
7a3e0bb2
RZ
2476 r = amdgpu_device_fw_loading(adev);
2477 if (r)
72d3f592 2478 goto init_failed;
7a3e0bb2 2479
0a4f2520
RZ
2480 r = amdgpu_device_ip_hw_init_phase2(adev);
2481 if (r)
72d3f592 2482 goto init_failed;
d38ceaf9 2483
121a2bc6
AG
2484 /*
2485 * retired pages will be loaded from eeprom and reserved here,
2486 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2487 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2488 * for I2C communication which only true at this point.
b82e65a9
GC
2489 *
2490 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2491 * failure from bad gpu situation and stop amdgpu init process
2492 * accordingly. For other failed cases, it will still release all
2493 * the resource and print error message, rather than returning one
2494 * negative value to upper level.
121a2bc6
AG
2495 *
2496 * Note: theoretically, this should be called before all vram allocations
2497 * to protect retired page from abusing
2498 */
b82e65a9
GC
2499 r = amdgpu_ras_recovery_init(adev);
2500 if (r)
2501 goto init_failed;
121a2bc6 2502
cfbb6b00
AG
2503 /**
2504 * In case of XGMI grab extra reference for reset domain for this device
2505 */
a4c63caf 2506 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2507 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2508 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2509 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2510
dfd0287b
LH
2511 if (WARN_ON(!hive)) {
2512 r = -ENOENT;
2513 goto init_failed;
2514 }
2515
46c67660 2516 if (!hive->reset_domain ||
2517 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2518 r = -ENOENT;
2519 amdgpu_put_xgmi_hive(hive);
2520 goto init_failed;
2521 }
2522
2523 /* Drop the early temporary reset domain we created for device */
2524 amdgpu_reset_put_reset_domain(adev->reset_domain);
2525 adev->reset_domain = hive->reset_domain;
9dfa4860 2526 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2527 }
a4c63caf
AG
2528 }
2529 }
2530
5fd8518d
AG
2531 r = amdgpu_device_init_schedulers(adev);
2532 if (r)
2533 goto init_failed;
e3c1b071 2534
2535 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2536 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2537 amdgpu_amdkfd_device_init(adev);
c6332b97 2538
bd607166
KR
2539 amdgpu_fru_get_product_info(adev);
2540
72d3f592 2541init_failed:
c6332b97 2542
72d3f592 2543 return r;
d38ceaf9
AD
2544}
2545
e3ecdffa
AD
2546/**
2547 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2548 *
2549 * @adev: amdgpu_device pointer
2550 *
2551 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2552 * this function before a GPU reset. If the value is retained after a
2553 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2554 */
06ec9070 2555static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2556{
2557 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2558}
2559
e3ecdffa
AD
2560/**
2561 * amdgpu_device_check_vram_lost - check if vram is valid
2562 *
2563 * @adev: amdgpu_device pointer
2564 *
2565 * Checks the reset magic value written to the gart pointer in VRAM.
2566 * The driver calls this after a GPU reset to see if the contents of
2567 * VRAM is lost or now.
2568 * returns true if vram is lost, false if not.
2569 */
06ec9070 2570static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2571{
dadce777
EQ
2572 if (memcmp(adev->gart.ptr, adev->reset_magic,
2573 AMDGPU_RESET_MAGIC_NUM))
2574 return true;
2575
53b3f8f4 2576 if (!amdgpu_in_reset(adev))
dadce777
EQ
2577 return false;
2578
2579 /*
2580 * For all ASICs with baco/mode1 reset, the VRAM is
2581 * always assumed to be lost.
2582 */
2583 switch (amdgpu_asic_reset_method(adev)) {
2584 case AMD_RESET_METHOD_BACO:
2585 case AMD_RESET_METHOD_MODE1:
2586 return true;
2587 default:
2588 return false;
2589 }
0c49e0b8
CZ
2590}
2591
e3ecdffa 2592/**
1112a46b 2593 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2594 *
2595 * @adev: amdgpu_device pointer
b8b72130 2596 * @state: clockgating state (gate or ungate)
e3ecdffa 2597 *
e3ecdffa 2598 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2599 * set_clockgating_state callbacks are run.
2600 * Late initialization pass enabling clockgating for hardware IPs.
2601 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2602 * Returns 0 on success, negative error code on failure.
2603 */
fdd34271 2604
5d89bb2d
LL
2605int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2606 enum amd_clockgating_state state)
d38ceaf9 2607{
1112a46b 2608 int i, j, r;
d38ceaf9 2609
4a2ba394
SL
2610 if (amdgpu_emu_mode == 1)
2611 return 0;
2612
1112a46b
RZ
2613 for (j = 0; j < adev->num_ip_blocks; j++) {
2614 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2615 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2616 continue;
47198eb7 2617 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2618 if (adev->in_s0ix &&
47198eb7
AD
2619 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2620 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2621 continue;
4a446d55 2622 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2623 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2624 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2625 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2626 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2627 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2628 /* enable clockgating to save power */
a1255107 2629 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2630 state);
4a446d55
AD
2631 if (r) {
2632 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2633 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2634 return r;
2635 }
b0b00ff1 2636 }
d38ceaf9 2637 }
06b18f61 2638
c9f96fd5
RZ
2639 return 0;
2640}
2641
5d89bb2d
LL
2642int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2643 enum amd_powergating_state state)
c9f96fd5 2644{
1112a46b 2645 int i, j, r;
06b18f61 2646
c9f96fd5
RZ
2647 if (amdgpu_emu_mode == 1)
2648 return 0;
2649
1112a46b
RZ
2650 for (j = 0; j < adev->num_ip_blocks; j++) {
2651 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2652 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2653 continue;
47198eb7 2654 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2655 if (adev->in_s0ix &&
47198eb7
AD
2656 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2657 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2658 continue;
c9f96fd5
RZ
2659 /* skip CG for VCE/UVD, it's handled specially */
2660 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2661 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2662 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2663 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2664 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2665 /* enable powergating to save power */
2666 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2667 state);
c9f96fd5
RZ
2668 if (r) {
2669 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2670 adev->ip_blocks[i].version->funcs->name, r);
2671 return r;
2672 }
2673 }
2674 }
2dc80b00
S
2675 return 0;
2676}
2677
beff74bc
AD
2678static int amdgpu_device_enable_mgpu_fan_boost(void)
2679{
2680 struct amdgpu_gpu_instance *gpu_ins;
2681 struct amdgpu_device *adev;
2682 int i, ret = 0;
2683
2684 mutex_lock(&mgpu_info.mutex);
2685
2686 /*
2687 * MGPU fan boost feature should be enabled
2688 * only when there are two or more dGPUs in
2689 * the system
2690 */
2691 if (mgpu_info.num_dgpu < 2)
2692 goto out;
2693
2694 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2695 gpu_ins = &(mgpu_info.gpu_ins[i]);
2696 adev = gpu_ins->adev;
2697 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2698 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2699 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2700 if (ret)
2701 break;
2702
2703 gpu_ins->mgpu_fan_enabled = 1;
2704 }
2705 }
2706
2707out:
2708 mutex_unlock(&mgpu_info.mutex);
2709
2710 return ret;
2711}
2712
e3ecdffa
AD
2713/**
2714 * amdgpu_device_ip_late_init - run late init for hardware IPs
2715 *
2716 * @adev: amdgpu_device pointer
2717 *
2718 * Late initialization pass for hardware IPs. The list of all the hardware
2719 * IPs that make up the asic is walked and the late_init callbacks are run.
2720 * late_init covers any special initialization that an IP requires
2721 * after all of the have been initialized or something that needs to happen
2722 * late in the init process.
2723 * Returns 0 on success, negative error code on failure.
2724 */
06ec9070 2725static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2726{
60599a03 2727 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2728 int i = 0, r;
2729
2730 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2731 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2732 continue;
2733 if (adev->ip_blocks[i].version->funcs->late_init) {
2734 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2735 if (r) {
2736 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2737 adev->ip_blocks[i].version->funcs->name, r);
2738 return r;
2739 }
2dc80b00 2740 }
73f847db 2741 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2742 }
2743
867e24ca 2744 r = amdgpu_ras_late_init(adev);
2745 if (r) {
2746 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2747 return r;
2748 }
2749
a891d239
DL
2750 amdgpu_ras_set_error_query_ready(adev, true);
2751
1112a46b
RZ
2752 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2753 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2754
06ec9070 2755 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2756
beff74bc
AD
2757 r = amdgpu_device_enable_mgpu_fan_boost();
2758 if (r)
2759 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2760
4da8b639 2761 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2762 if (amdgpu_passthrough(adev) &&
2763 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2764 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2765 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2766
2767 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2768 mutex_lock(&mgpu_info.mutex);
2769
2770 /*
2771 * Reset device p-state to low as this was booted with high.
2772 *
2773 * This should be performed only after all devices from the same
2774 * hive get initialized.
2775 *
2776 * However, it's unknown how many device in the hive in advance.
2777 * As this is counted one by one during devices initializations.
2778 *
2779 * So, we wait for all XGMI interlinked devices initialized.
2780 * This may bring some delays as those devices may come from
2781 * different hives. But that should be OK.
2782 */
2783 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2784 for (i = 0; i < mgpu_info.num_gpu; i++) {
2785 gpu_instance = &(mgpu_info.gpu_ins[i]);
2786 if (gpu_instance->adev->flags & AMD_IS_APU)
2787 continue;
2788
d84a430d
JK
2789 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2790 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2791 if (r) {
2792 DRM_ERROR("pstate setting failed (%d).\n", r);
2793 break;
2794 }
2795 }
2796 }
2797
2798 mutex_unlock(&mgpu_info.mutex);
2799 }
2800
d38ceaf9
AD
2801 return 0;
2802}
2803
613aa3ea
LY
2804/**
2805 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2806 *
2807 * @adev: amdgpu_device pointer
2808 *
2809 * For ASICs need to disable SMC first
2810 */
2811static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2812{
2813 int i, r;
2814
2815 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2816 return;
2817
2818 for (i = 0; i < adev->num_ip_blocks; i++) {
2819 if (!adev->ip_blocks[i].status.hw)
2820 continue;
2821 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2822 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2823 /* XXX handle errors */
2824 if (r) {
2825 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2826 adev->ip_blocks[i].version->funcs->name, r);
2827 }
2828 adev->ip_blocks[i].status.hw = false;
2829 break;
2830 }
2831 }
2832}
2833
e9669fb7 2834static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2835{
2836 int i, r;
2837
e9669fb7
AG
2838 for (i = 0; i < adev->num_ip_blocks; i++) {
2839 if (!adev->ip_blocks[i].version->funcs->early_fini)
2840 continue;
5278a159 2841
e9669fb7
AG
2842 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2843 if (r) {
2844 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2845 adev->ip_blocks[i].version->funcs->name, r);
2846 }
2847 }
c030f2e4 2848
05df1f01 2849 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2850 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2851
7270e895
TY
2852 amdgpu_amdkfd_suspend(adev, false);
2853
613aa3ea
LY
2854 /* Workaroud for ASICs need to disable SMC first */
2855 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2856
d38ceaf9 2857 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2858 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2859 continue;
8201a67a 2860
a1255107 2861 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2862 /* XXX handle errors */
2c1a2784 2863 if (r) {
a1255107
AD
2864 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2865 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2866 }
8201a67a 2867
a1255107 2868 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2869 }
2870
6effad8a
GC
2871 if (amdgpu_sriov_vf(adev)) {
2872 if (amdgpu_virt_release_full_gpu(adev, false))
2873 DRM_ERROR("failed to release exclusive mode on fini\n");
2874 }
2875
e9669fb7
AG
2876 return 0;
2877}
2878
2879/**
2880 * amdgpu_device_ip_fini - run fini for hardware IPs
2881 *
2882 * @adev: amdgpu_device pointer
2883 *
2884 * Main teardown pass for hardware IPs. The list of all the hardware
2885 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2886 * are run. hw_fini tears down the hardware associated with each IP
2887 * and sw_fini tears down any software state associated with each IP.
2888 * Returns 0 on success, negative error code on failure.
2889 */
2890static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2891{
2892 int i, r;
2893
2894 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2895 amdgpu_virt_release_ras_err_handler_data(adev);
2896
e9669fb7
AG
2897 if (adev->gmc.xgmi.num_physical_nodes > 1)
2898 amdgpu_xgmi_remove_device(adev);
2899
c004d44e 2900 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2901
d38ceaf9 2902 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2903 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2904 continue;
c12aba3a
ML
2905
2906 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2907 amdgpu_ucode_free_bo(adev);
1e256e27 2908 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2909 amdgpu_device_wb_fini(adev);
7ccfd79f 2910 amdgpu_device_mem_scratch_fini(adev);
533aed27 2911 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2912 }
2913
a1255107 2914 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2915 /* XXX handle errors */
2c1a2784 2916 if (r) {
a1255107
AD
2917 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2918 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2919 }
a1255107
AD
2920 adev->ip_blocks[i].status.sw = false;
2921 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2922 }
2923
a6dcfd9c 2924 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2925 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2926 continue;
a1255107
AD
2927 if (adev->ip_blocks[i].version->funcs->late_fini)
2928 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2929 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2930 }
2931
c030f2e4 2932 amdgpu_ras_fini(adev);
2933
d38ceaf9
AD
2934 return 0;
2935}
2936
e3ecdffa 2937/**
beff74bc 2938 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2939 *
1112a46b 2940 * @work: work_struct.
e3ecdffa 2941 */
beff74bc 2942static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2943{
2944 struct amdgpu_device *adev =
beff74bc 2945 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2946 int r;
2947
2948 r = amdgpu_ib_ring_tests(adev);
2949 if (r)
2950 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2951}
2952
1e317b99
RZ
2953static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2954{
2955 struct amdgpu_device *adev =
2956 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2957
90a92662
MD
2958 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2959 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2960
2961 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2962 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2963}
2964
e3ecdffa 2965/**
e7854a03 2966 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2967 *
2968 * @adev: amdgpu_device pointer
2969 *
2970 * Main suspend function for hardware IPs. The list of all the hardware
2971 * IPs that make up the asic is walked, clockgating is disabled and the
2972 * suspend callbacks are run. suspend puts the hardware and software state
2973 * in each IP into a state suitable for suspend.
2974 * Returns 0 on success, negative error code on failure.
2975 */
e7854a03
AD
2976static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2977{
2978 int i, r;
2979
50ec83f0
AD
2980 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2981 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2982
b31d6ada
EQ
2983 /*
2984 * Per PMFW team's suggestion, driver needs to handle gfxoff
2985 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2986 * scenario. Add the missing df cstate disablement here.
2987 */
2988 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2989 dev_warn(adev->dev, "Failed to disallow df cstate");
2990
e7854a03
AD
2991 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2992 if (!adev->ip_blocks[i].status.valid)
2993 continue;
2b9f7848 2994
e7854a03 2995 /* displays are handled separately */
2b9f7848
ND
2996 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2997 continue;
2998
2999 /* XXX handle errors */
3000 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3001 /* XXX handle errors */
3002 if (r) {
3003 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3004 adev->ip_blocks[i].version->funcs->name, r);
3005 return r;
e7854a03 3006 }
2b9f7848
ND
3007
3008 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3009 }
3010
e7854a03
AD
3011 return 0;
3012}
3013
3014/**
3015 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3016 *
3017 * @adev: amdgpu_device pointer
3018 *
3019 * Main suspend function for hardware IPs. The list of all the hardware
3020 * IPs that make up the asic is walked, clockgating is disabled and the
3021 * suspend callbacks are run. suspend puts the hardware and software state
3022 * in each IP into a state suitable for suspend.
3023 * Returns 0 on success, negative error code on failure.
3024 */
3025static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3026{
3027 int i, r;
3028
557f42a2 3029 if (adev->in_s0ix)
bc143d8b 3030 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3031
d38ceaf9 3032 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3033 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3034 continue;
e7854a03
AD
3035 /* displays are handled in phase1 */
3036 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3037 continue;
bff77e86
LM
3038 /* PSP lost connection when err_event_athub occurs */
3039 if (amdgpu_ras_intr_triggered() &&
3040 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3041 adev->ip_blocks[i].status.hw = false;
3042 continue;
3043 }
e3c1b071 3044
3045 /* skip unnecessary suspend if we do not initialize them yet */
3046 if (adev->gmc.xgmi.pending_reset &&
3047 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3048 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3049 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3050 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3051 adev->ip_blocks[i].status.hw = false;
3052 continue;
3053 }
557f42a2 3054
afa6646b 3055 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3056 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3057 * like at runtime. PSP is also part of the always on hardware
3058 * so no need to suspend it.
3059 */
557f42a2 3060 if (adev->in_s0ix &&
32ff160d 3061 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3062 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3063 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3064 continue;
3065
2a7798ea
AD
3066 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3067 if (adev->in_s0ix &&
3068 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3069 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3070 continue;
3071
e11c7750
TH
3072 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3073 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3074 * from this location and RLC Autoload automatically also gets loaded
3075 * from here based on PMFW -> PSP message during re-init sequence.
3076 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3077 * the TMR and reload FWs again for IMU enabled APU ASICs.
3078 */
3079 if (amdgpu_in_reset(adev) &&
3080 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3081 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3082 continue;
3083
d38ceaf9 3084 /* XXX handle errors */
a1255107 3085 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3086 /* XXX handle errors */
2c1a2784 3087 if (r) {
a1255107
AD
3088 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3089 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3090 }
876923fb 3091 adev->ip_blocks[i].status.hw = false;
a3a09142 3092 /* handle putting the SMC in the appropriate state */
47fc644f 3093 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3094 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3095 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3096 if (r) {
3097 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3098 adev->mp1_state, r);
3099 return r;
3100 }
a3a09142
AD
3101 }
3102 }
d38ceaf9
AD
3103 }
3104
3105 return 0;
3106}
3107
e7854a03
AD
3108/**
3109 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3110 *
3111 * @adev: amdgpu_device pointer
3112 *
3113 * Main suspend function for hardware IPs. The list of all the hardware
3114 * IPs that make up the asic is walked, clockgating is disabled and the
3115 * suspend callbacks are run. suspend puts the hardware and software state
3116 * in each IP into a state suitable for suspend.
3117 * Returns 0 on success, negative error code on failure.
3118 */
3119int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3120{
3121 int r;
3122
3c73683c
JC
3123 if (amdgpu_sriov_vf(adev)) {
3124 amdgpu_virt_fini_data_exchange(adev);
e7819644 3125 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3126 }
e7819644 3127
e7854a03
AD
3128 r = amdgpu_device_ip_suspend_phase1(adev);
3129 if (r)
3130 return r;
3131 r = amdgpu_device_ip_suspend_phase2(adev);
3132
e7819644
YT
3133 if (amdgpu_sriov_vf(adev))
3134 amdgpu_virt_release_full_gpu(adev, false);
3135
e7854a03
AD
3136 return r;
3137}
3138
06ec9070 3139static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3140{
3141 int i, r;
3142
2cb681b6 3143 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3144 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3145 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3146 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3147 AMD_IP_BLOCK_TYPE_IH,
3148 };
a90ad3c2 3149
95ea3dbc 3150 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3151 int j;
3152 struct amdgpu_ip_block *block;
a90ad3c2 3153
4cd2a96d
J
3154 block = &adev->ip_blocks[i];
3155 block->status.hw = false;
2cb681b6 3156
4cd2a96d 3157 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3158
4cd2a96d 3159 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3160 !block->status.valid)
3161 continue;
3162
3163 r = block->version->funcs->hw_init(adev);
0aaeefcc 3164 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3165 if (r)
3166 return r;
482f0e53 3167 block->status.hw = true;
a90ad3c2
ML
3168 }
3169 }
3170
3171 return 0;
3172}
3173
06ec9070 3174static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3175{
3176 int i, r;
3177
2cb681b6
ML
3178 static enum amd_ip_block_type ip_order[] = {
3179 AMD_IP_BLOCK_TYPE_SMC,
3180 AMD_IP_BLOCK_TYPE_DCE,
3181 AMD_IP_BLOCK_TYPE_GFX,
3182 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3183 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3184 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3185 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3186 AMD_IP_BLOCK_TYPE_VCN,
3187 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3188 };
a90ad3c2 3189
2cb681b6
ML
3190 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3191 int j;
3192 struct amdgpu_ip_block *block;
a90ad3c2 3193
2cb681b6
ML
3194 for (j = 0; j < adev->num_ip_blocks; j++) {
3195 block = &adev->ip_blocks[j];
3196
3197 if (block->version->type != ip_order[i] ||
482f0e53
ML
3198 !block->status.valid ||
3199 block->status.hw)
2cb681b6
ML
3200 continue;
3201
895bd048
JZ
3202 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3203 r = block->version->funcs->resume(adev);
3204 else
3205 r = block->version->funcs->hw_init(adev);
3206
0aaeefcc 3207 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3208 if (r)
3209 return r;
482f0e53 3210 block->status.hw = true;
a90ad3c2
ML
3211 }
3212 }
3213
3214 return 0;
3215}
3216
e3ecdffa
AD
3217/**
3218 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3219 *
3220 * @adev: amdgpu_device pointer
3221 *
3222 * First resume function for hardware IPs. The list of all the hardware
3223 * IPs that make up the asic is walked and the resume callbacks are run for
3224 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3225 * after a suspend and updates the software state as necessary. This
3226 * function is also used for restoring the GPU after a GPU reset.
3227 * Returns 0 on success, negative error code on failure.
3228 */
06ec9070 3229static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3230{
3231 int i, r;
3232
a90ad3c2 3233 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3234 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3235 continue;
a90ad3c2 3236 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3237 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3238 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3239 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3240
fcf0649f
CZ
3241 r = adev->ip_blocks[i].version->funcs->resume(adev);
3242 if (r) {
3243 DRM_ERROR("resume of IP block <%s> failed %d\n",
3244 adev->ip_blocks[i].version->funcs->name, r);
3245 return r;
3246 }
482f0e53 3247 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3248 }
3249 }
3250
3251 return 0;
3252}
3253
e3ecdffa
AD
3254/**
3255 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3256 *
3257 * @adev: amdgpu_device pointer
3258 *
3259 * First resume function for hardware IPs. The list of all the hardware
3260 * IPs that make up the asic is walked and the resume callbacks are run for
3261 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3262 * functional state after a suspend and updates the software state as
3263 * necessary. This function is also used for restoring the GPU after a GPU
3264 * reset.
3265 * Returns 0 on success, negative error code on failure.
3266 */
06ec9070 3267static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3268{
3269 int i, r;
3270
3271 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3272 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3273 continue;
fcf0649f 3274 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3275 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3276 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3277 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3278 continue;
a1255107 3279 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3280 if (r) {
a1255107
AD
3281 DRM_ERROR("resume of IP block <%s> failed %d\n",
3282 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3283 return r;
2c1a2784 3284 }
482f0e53 3285 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3286 }
3287
3288 return 0;
3289}
3290
e3ecdffa
AD
3291/**
3292 * amdgpu_device_ip_resume - run resume for hardware IPs
3293 *
3294 * @adev: amdgpu_device pointer
3295 *
3296 * Main resume function for hardware IPs. The hardware IPs
3297 * are split into two resume functions because they are
3298 * are also used in in recovering from a GPU reset and some additional
3299 * steps need to be take between them. In this case (S3/S4) they are
3300 * run sequentially.
3301 * Returns 0 on success, negative error code on failure.
3302 */
06ec9070 3303static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3304{
3305 int r;
3306
f2206741
AL
3307 if (!adev->in_s0ix) {
3308 r = amdgpu_amdkfd_resume_iommu(adev);
3309 if (r)
3310 return r;
3311 }
9cec53c1 3312
06ec9070 3313 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3314 if (r)
3315 return r;
7a3e0bb2
RZ
3316
3317 r = amdgpu_device_fw_loading(adev);
3318 if (r)
3319 return r;
3320
06ec9070 3321 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3322
3323 return r;
3324}
3325
e3ecdffa
AD
3326/**
3327 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3328 *
3329 * @adev: amdgpu_device pointer
3330 *
3331 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3332 */
4e99a44e 3333static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3334{
6867e1b5
ML
3335 if (amdgpu_sriov_vf(adev)) {
3336 if (adev->is_atom_fw) {
58ff791a 3337 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3338 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3339 } else {
3340 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3341 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3342 }
3343
3344 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3345 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3346 }
048765ad
AR
3347}
3348
e3ecdffa
AD
3349/**
3350 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3351 *
3352 * @asic_type: AMD asic type
3353 *
3354 * Check if there is DC (new modesetting infrastructre) support for an asic.
3355 * returns true if DC has support, false if not.
3356 */
4562236b
HW
3357bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3358{
3359 switch (asic_type) {
0637d417
AD
3360#ifdef CONFIG_DRM_AMDGPU_SI
3361 case CHIP_HAINAN:
3362#endif
3363 case CHIP_TOPAZ:
3364 /* chips with no display hardware */
3365 return false;
4562236b 3366#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3367 case CHIP_TAHITI:
3368 case CHIP_PITCAIRN:
3369 case CHIP_VERDE:
3370 case CHIP_OLAND:
2d32ffd6
AD
3371 /*
3372 * We have systems in the wild with these ASICs that require
3373 * LVDS and VGA support which is not supported with DC.
3374 *
3375 * Fallback to the non-DC driver here by default so as not to
3376 * cause regressions.
3377 */
3378#if defined(CONFIG_DRM_AMD_DC_SI)
3379 return amdgpu_dc > 0;
3380#else
3381 return false;
64200c46 3382#endif
4562236b 3383 case CHIP_BONAIRE:
0d6fbccb 3384 case CHIP_KAVERI:
367e6687
AD
3385 case CHIP_KABINI:
3386 case CHIP_MULLINS:
d9fda248
HW
3387 /*
3388 * We have systems in the wild with these ASICs that require
b5a0168e 3389 * VGA support which is not supported with DC.
d9fda248
HW
3390 *
3391 * Fallback to the non-DC driver here by default so as not to
3392 * cause regressions.
3393 */
3394 return amdgpu_dc > 0;
f7f12b25 3395 default:
fd187853 3396 return amdgpu_dc != 0;
f7f12b25 3397#else
4562236b 3398 default:
93b09a9a 3399 if (amdgpu_dc > 0)
044a48f4 3400 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3401 "but isn't supported by ASIC, ignoring\n");
4562236b 3402 return false;
f7f12b25 3403#endif
4562236b
HW
3404 }
3405}
3406
3407/**
3408 * amdgpu_device_has_dc_support - check if dc is supported
3409 *
982a820b 3410 * @adev: amdgpu_device pointer
4562236b
HW
3411 *
3412 * Returns true for supported, false for not supported
3413 */
3414bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3415{
25263da3 3416 if (adev->enable_virtual_display ||
abaf210c 3417 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3418 return false;
3419
4562236b
HW
3420 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3421}
3422
d4535e2c
AG
3423static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3424{
3425 struct amdgpu_device *adev =
3426 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3427 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3428
c6a6e2db
AG
3429 /* It's a bug to not have a hive within this function */
3430 if (WARN_ON(!hive))
3431 return;
3432
3433 /*
3434 * Use task barrier to synchronize all xgmi reset works across the
3435 * hive. task_barrier_enter and task_barrier_exit will block
3436 * until all the threads running the xgmi reset works reach
3437 * those points. task_barrier_full will do both blocks.
3438 */
3439 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3440
3441 task_barrier_enter(&hive->tb);
4a580877 3442 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3443
3444 if (adev->asic_reset_res)
3445 goto fail;
3446
3447 task_barrier_exit(&hive->tb);
4a580877 3448 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3449
3450 if (adev->asic_reset_res)
3451 goto fail;
43c4d576 3452
5e67bba3 3453 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3454 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3455 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3456 } else {
3457
3458 task_barrier_full(&hive->tb);
3459 adev->asic_reset_res = amdgpu_asic_reset(adev);
3460 }
ce316fa5 3461
c6a6e2db 3462fail:
d4535e2c 3463 if (adev->asic_reset_res)
fed184e9 3464 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3465 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3466 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3467}
3468
71f98027
AD
3469static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3470{
3471 char *input = amdgpu_lockup_timeout;
3472 char *timeout_setting = NULL;
3473 int index = 0;
3474 long timeout;
3475 int ret = 0;
3476
3477 /*
67387dfe
AD
3478 * By default timeout for non compute jobs is 10000
3479 * and 60000 for compute jobs.
71f98027 3480 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3481 * jobs are 60000 by default.
71f98027
AD
3482 */
3483 adev->gfx_timeout = msecs_to_jiffies(10000);
3484 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3485 if (amdgpu_sriov_vf(adev))
3486 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3487 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3488 else
67387dfe 3489 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3490
f440ff44 3491 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3492 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3493 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3494 ret = kstrtol(timeout_setting, 0, &timeout);
3495 if (ret)
3496 return ret;
3497
3498 if (timeout == 0) {
3499 index++;
3500 continue;
3501 } else if (timeout < 0) {
3502 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3503 dev_warn(adev->dev, "lockup timeout disabled");
3504 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3505 } else {
3506 timeout = msecs_to_jiffies(timeout);
3507 }
3508
3509 switch (index++) {
3510 case 0:
3511 adev->gfx_timeout = timeout;
3512 break;
3513 case 1:
3514 adev->compute_timeout = timeout;
3515 break;
3516 case 2:
3517 adev->sdma_timeout = timeout;
3518 break;
3519 case 3:
3520 adev->video_timeout = timeout;
3521 break;
3522 default:
3523 break;
3524 }
3525 }
3526 /*
3527 * There is only one value specified and
3528 * it should apply to all non-compute jobs.
3529 */
bcccee89 3530 if (index == 1) {
71f98027 3531 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3532 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3533 adev->compute_timeout = adev->gfx_timeout;
3534 }
71f98027
AD
3535 }
3536
3537 return ret;
3538}
d4535e2c 3539
4a74c38c
PY
3540/**
3541 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3542 *
3543 * @adev: amdgpu_device pointer
3544 *
3545 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3546 */
3547static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3548{
3549 struct iommu_domain *domain;
3550
3551 domain = iommu_get_domain_for_dev(adev->dev);
3552 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3553 adev->ram_is_direct_mapped = true;
3554}
3555
77f3a5cd
ND
3556static const struct attribute *amdgpu_dev_attributes[] = {
3557 &dev_attr_product_name.attr,
3558 &dev_attr_product_number.attr,
3559 &dev_attr_serial_number.attr,
3560 &dev_attr_pcie_replay_count.attr,
3561 NULL
3562};
3563
d38ceaf9
AD
3564/**
3565 * amdgpu_device_init - initialize the driver
3566 *
3567 * @adev: amdgpu_device pointer
d38ceaf9
AD
3568 * @flags: driver flags
3569 *
3570 * Initializes the driver info and hw (all asics).
3571 * Returns 0 for success or an error on failure.
3572 * Called at driver startup.
3573 */
3574int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3575 uint32_t flags)
3576{
8aba21b7
LT
3577 struct drm_device *ddev = adev_to_drm(adev);
3578 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3579 int r, i;
b98c6299 3580 bool px = false;
95844d20 3581 u32 max_MBps;
59e9fff1 3582 int tmp;
d38ceaf9
AD
3583
3584 adev->shutdown = false;
d38ceaf9 3585 adev->flags = flags;
4e66d7d2
YZ
3586
3587 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3588 adev->asic_type = amdgpu_force_asic_type;
3589 else
3590 adev->asic_type = flags & AMD_ASIC_MASK;
3591
d38ceaf9 3592 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3593 if (amdgpu_emu_mode == 1)
8bdab6bb 3594 adev->usec_timeout *= 10;
770d13b1 3595 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3596 adev->accel_working = false;
3597 adev->num_rings = 0;
68ce8b24 3598 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3599 adev->mman.buffer_funcs = NULL;
3600 adev->mman.buffer_funcs_ring = NULL;
3601 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3602 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3603 adev->gmc.gmc_funcs = NULL;
7bd939d0 3604 adev->harvest_ip_mask = 0x0;
f54d1867 3605 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3606 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3607
3608 adev->smc_rreg = &amdgpu_invalid_rreg;
3609 adev->smc_wreg = &amdgpu_invalid_wreg;
3610 adev->pcie_rreg = &amdgpu_invalid_rreg;
3611 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3612 adev->pciep_rreg = &amdgpu_invalid_rreg;
3613 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3614 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3615 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3616 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3617 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3618 adev->didt_rreg = &amdgpu_invalid_rreg;
3619 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3620 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3621 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3622 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3623 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3624
3e39ab90
AD
3625 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3626 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3627 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3628
3629 /* mutex initialization are all done here so we
3630 * can recall function without having locking issues */
0e5ca0d1 3631 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3632 mutex_init(&adev->pm.mutex);
3633 mutex_init(&adev->gfx.gpu_clock_mutex);
3634 mutex_init(&adev->srbm_mutex);
b8866c26 3635 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3636 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3637 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3638 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3639 mutex_init(&adev->mn_lock);
e23b74aa 3640 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3641 hash_init(adev->mn_hash);
32eaeae0 3642 mutex_init(&adev->psp.mutex);
bd052211 3643 mutex_init(&adev->notifier_lock);
8cda7a4f 3644 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3645 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3646
ab3b9de6 3647 amdgpu_device_init_apu_flags(adev);
9f6a7857 3648
912dfc84
EQ
3649 r = amdgpu_device_check_arguments(adev);
3650 if (r)
3651 return r;
d38ceaf9 3652
d38ceaf9
AD
3653 spin_lock_init(&adev->mmio_idx_lock);
3654 spin_lock_init(&adev->smc_idx_lock);
3655 spin_lock_init(&adev->pcie_idx_lock);
3656 spin_lock_init(&adev->uvd_ctx_idx_lock);
3657 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3658 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3659 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3660 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3661 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3662
0c4e7fa5
CZ
3663 INIT_LIST_HEAD(&adev->shadow_list);
3664 mutex_init(&adev->shadow_list_lock);
3665
655ce9cb 3666 INIT_LIST_HEAD(&adev->reset_list);
3667
6492e1b0 3668 INIT_LIST_HEAD(&adev->ras_list);
3669
beff74bc
AD
3670 INIT_DELAYED_WORK(&adev->delayed_init_work,
3671 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3672 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3673 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3674
d4535e2c
AG
3675 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3676
d23ee13f 3677 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3678 adev->gfx.gfx_off_residency = 0;
3679 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3680 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3681
b265bdbd
EQ
3682 atomic_set(&adev->throttling_logging_enabled, 1);
3683 /*
3684 * If throttling continues, logging will be performed every minute
3685 * to avoid log flooding. "-1" is subtracted since the thermal
3686 * throttling interrupt comes every second. Thus, the total logging
3687 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3688 * for throttling interrupt) = 60 seconds.
3689 */
3690 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3691 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3692
0fa49558
AX
3693 /* Registers mapping */
3694 /* TODO: block userspace mapping of io register */
da69c161
KW
3695 if (adev->asic_type >= CHIP_BONAIRE) {
3696 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3697 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3698 } else {
3699 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3700 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3701 }
d38ceaf9 3702
6c08e0ef
EQ
3703 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3704 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3705
d38ceaf9
AD
3706 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3707 if (adev->rmmio == NULL) {
3708 return -ENOMEM;
3709 }
3710 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3711 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3712
5494d864
AD
3713 amdgpu_device_get_pcie_info(adev);
3714
b239c017
JX
3715 if (amdgpu_mcbp)
3716 DRM_INFO("MCBP is enabled\n");
3717
436afdfa
PY
3718 /*
3719 * Reset domain needs to be present early, before XGMI hive discovered
3720 * (if any) and intitialized to use reset sem and in_gpu reset flag
3721 * early on during init and before calling to RREG32.
3722 */
3723 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3724 if (!adev->reset_domain)
3725 return -ENOMEM;
3726
3aa0115d
ML
3727 /* detect hw virtualization here */
3728 amdgpu_detect_virtualization(adev);
3729
dffa11b4
ML
3730 r = amdgpu_device_get_job_timeout_settings(adev);
3731 if (r) {
3732 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3733 return r;
a190d1c7
XY
3734 }
3735
d38ceaf9 3736 /* early init functions */
06ec9070 3737 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3738 if (r)
4ef87d8f 3739 return r;
d38ceaf9 3740
b7cdb41e
ML
3741 /* Get rid of things like offb */
3742 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3743 if (r)
3744 return r;
3745
4d33e704
SK
3746 /* Enable TMZ based on IP_VERSION */
3747 amdgpu_gmc_tmz_set(adev);
3748
957b0787 3749 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3750 /* Need to get xgmi info early to decide the reset behavior*/
3751 if (adev->gmc.xgmi.supported) {
3752 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3753 if (r)
3754 return r;
3755 }
3756
8e6d0b69 3757 /* enable PCIE atomic ops */
3758 if (amdgpu_sriov_vf(adev))
3759 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
e15c9d06 3760 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
8e6d0b69 3761 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3762 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3763 * internal path natively support atomics, set have_atomics_support to true.
3764 */
3765 else if ((adev->flags & AMD_IS_APU) &&
3766 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
3767 adev->have_atomics_support = true;
8e6d0b69 3768 else
3769 adev->have_atomics_support =
3770 !pci_enable_atomic_ops_to_root(adev->pdev,
3771 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3772 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3773 if (!adev->have_atomics_support)
3774 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3775
6585661d
OZ
3776 /* doorbell bar mapping and doorbell index init*/
3777 amdgpu_device_doorbell_init(adev);
3778
9475a943
SL
3779 if (amdgpu_emu_mode == 1) {
3780 /* post the asic on emulation mode */
3781 emu_soc_asic_init(adev);
bfca0289 3782 goto fence_driver_init;
9475a943 3783 }
bfca0289 3784
04442bf7
LL
3785 amdgpu_reset_init(adev);
3786
4e99a44e
ML
3787 /* detect if we are with an SRIOV vbios */
3788 amdgpu_device_detect_sriov_bios(adev);
048765ad 3789
95e8e59e
AD
3790 /* check if we need to reset the asic
3791 * E.g., driver was not cleanly unloaded previously, etc.
3792 */
f14899fd 3793 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3794 if (adev->gmc.xgmi.num_physical_nodes) {
3795 dev_info(adev->dev, "Pending hive reset.\n");
3796 adev->gmc.xgmi.pending_reset = true;
3797 /* Only need to init necessary block for SMU to handle the reset */
3798 for (i = 0; i < adev->num_ip_blocks; i++) {
3799 if (!adev->ip_blocks[i].status.valid)
3800 continue;
3801 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3802 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3803 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3804 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3805 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3806 adev->ip_blocks[i].version->funcs->name);
3807 adev->ip_blocks[i].status.hw = true;
3808 }
3809 }
3810 } else {
59e9fff1 3811 tmp = amdgpu_reset_method;
3812 /* It should do a default reset when loading or reloading the driver,
3813 * regardless of the module parameter reset_method.
3814 */
3815 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3816 r = amdgpu_asic_reset(adev);
59e9fff1 3817 amdgpu_reset_method = tmp;
e3c1b071 3818 if (r) {
3819 dev_err(adev->dev, "asic reset on init failed\n");
3820 goto failed;
3821 }
95e8e59e
AD
3822 }
3823 }
3824
d38ceaf9 3825 /* Post card if necessary */
39c640c0 3826 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3827 if (!adev->bios) {
bec86378 3828 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3829 r = -EINVAL;
3830 goto failed;
d38ceaf9 3831 }
bec86378 3832 DRM_INFO("GPU posting now...\n");
4d2997ab 3833 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3834 if (r) {
3835 dev_err(adev->dev, "gpu post error!\n");
3836 goto failed;
3837 }
d38ceaf9
AD
3838 }
3839
88b64e95
AD
3840 if (adev->is_atom_fw) {
3841 /* Initialize clocks */
3842 r = amdgpu_atomfirmware_get_clock_info(adev);
3843 if (r) {
3844 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3845 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3846 goto failed;
3847 }
3848 } else {
a5bde2f9
AD
3849 /* Initialize clocks */
3850 r = amdgpu_atombios_get_clock_info(adev);
3851 if (r) {
3852 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3853 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3854 goto failed;
a5bde2f9
AD
3855 }
3856 /* init i2c buses */
4562236b
HW
3857 if (!amdgpu_device_has_dc_support(adev))
3858 amdgpu_atombios_i2c_init(adev);
2c1a2784 3859 }
d38ceaf9 3860
bfca0289 3861fence_driver_init:
d38ceaf9 3862 /* Fence driver */
067f44c8 3863 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3864 if (r) {
067f44c8 3865 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3866 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3867 goto failed;
2c1a2784 3868 }
d38ceaf9
AD
3869
3870 /* init the mode config */
4a580877 3871 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3872
06ec9070 3873 r = amdgpu_device_ip_init(adev);
d38ceaf9 3874 if (r) {
06ec9070 3875 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3876 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3877 goto release_ras_con;
d38ceaf9
AD
3878 }
3879
8d35a259
LG
3880 amdgpu_fence_driver_hw_init(adev);
3881
d69b8971
YZ
3882 dev_info(adev->dev,
3883 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3884 adev->gfx.config.max_shader_engines,
3885 adev->gfx.config.max_sh_per_se,
3886 adev->gfx.config.max_cu_per_sh,
3887 adev->gfx.cu_info.number);
3888
d38ceaf9
AD
3889 adev->accel_working = true;
3890
e59c0205
AX
3891 amdgpu_vm_check_compute_bug(adev);
3892
95844d20
MO
3893 /* Initialize the buffer migration limit. */
3894 if (amdgpu_moverate >= 0)
3895 max_MBps = amdgpu_moverate;
3896 else
3897 max_MBps = 8; /* Allow 8 MB/s. */
3898 /* Get a log2 for easy divisions. */
3899 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3900
d2f52ac8 3901 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
3902 if (r)
3903 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 3904
5bb23532 3905 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3906 if (r) {
3907 adev->ucode_sysfs_en = false;
5bb23532 3908 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3909 } else
3910 adev->ucode_sysfs_en = true;
5bb23532 3911
8424f2cc
LG
3912 r = amdgpu_psp_sysfs_init(adev);
3913 if (r) {
3914 adev->psp_sysfs_en = false;
3915 if (!amdgpu_sriov_vf(adev))
3916 DRM_ERROR("Creating psp sysfs failed\n");
3917 } else
3918 adev->psp_sysfs_en = true;
3919
b0adca4d
EQ
3920 /*
3921 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3922 * Otherwise the mgpu fan boost feature will be skipped due to the
3923 * gpu instance is counted less.
3924 */
3925 amdgpu_register_gpu_instance(adev);
3926
d38ceaf9
AD
3927 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3928 * explicit gating rather than handling it automatically.
3929 */
e3c1b071 3930 if (!adev->gmc.xgmi.pending_reset) {
3931 r = amdgpu_device_ip_late_init(adev);
3932 if (r) {
3933 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3934 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3935 goto release_ras_con;
e3c1b071 3936 }
3937 /* must succeed. */
3938 amdgpu_ras_resume(adev);
3939 queue_delayed_work(system_wq, &adev->delayed_init_work,
3940 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3941 }
d38ceaf9 3942
38eecbe0
CL
3943 if (amdgpu_sriov_vf(adev)) {
3944 amdgpu_virt_release_full_gpu(adev, true);
2c738637 3945 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 3946 }
2c738637 3947
77f3a5cd 3948 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3949 if (r)
77f3a5cd 3950 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3951
d155bef0
AB
3952 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3953 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3954 if (r)
3955 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3956
c1dd4aa6
AG
3957 /* Have stored pci confspace at hand for restore in sudden PCI error */
3958 if (amdgpu_device_cache_pci_state(adev->pdev))
3959 pci_restore_state(pdev);
3960
8c3dd61c
KHF
3961 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3962 /* this will fail for cards that aren't VGA class devices, just
3963 * ignore it */
3964 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3965 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 3966
d37a3929
OC
3967 px = amdgpu_device_supports_px(ddev);
3968
3969 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3970 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
3971 vga_switcheroo_register_client(adev->pdev,
3972 &amdgpu_switcheroo_ops, px);
d37a3929
OC
3973
3974 if (px)
8c3dd61c 3975 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 3976
e3c1b071 3977 if (adev->gmc.xgmi.pending_reset)
3978 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3979 msecs_to_jiffies(AMDGPU_RESUME_MS));
3980
4a74c38c
PY
3981 amdgpu_device_check_iommu_direct_map(adev);
3982
d38ceaf9 3983 return 0;
83ba126a 3984
970fd197 3985release_ras_con:
38eecbe0
CL
3986 if (amdgpu_sriov_vf(adev))
3987 amdgpu_virt_release_full_gpu(adev, true);
3988
3989 /* failed in exclusive mode due to timeout */
3990 if (amdgpu_sriov_vf(adev) &&
3991 !amdgpu_sriov_runtime(adev) &&
3992 amdgpu_virt_mmio_blocked(adev) &&
3993 !amdgpu_virt_wait_reset(adev)) {
3994 dev_err(adev->dev, "VF exclusive mode timeout\n");
3995 /* Don't send request since VF is inactive. */
3996 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3997 adev->virt.ops = NULL;
3998 r = -EAGAIN;
3999 }
970fd197
SY
4000 amdgpu_release_ras_context(adev);
4001
83ba126a 4002failed:
89041940 4003 amdgpu_vf_error_trans_all(adev);
8840a387 4004
83ba126a 4005 return r;
d38ceaf9
AD
4006}
4007
07775fc1
AG
4008static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4009{
62d5f9f7 4010
07775fc1
AG
4011 /* Clear all CPU mappings pointing to this device */
4012 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4013
4014 /* Unmap all mapped bars - Doorbell, registers and VRAM */
4015 amdgpu_device_doorbell_fini(adev);
4016
4017 iounmap(adev->rmmio);
4018 adev->rmmio = NULL;
4019 if (adev->mman.aper_base_kaddr)
4020 iounmap(adev->mman.aper_base_kaddr);
4021 adev->mman.aper_base_kaddr = NULL;
4022
4023 /* Memory manager related */
4024 if (!adev->gmc.xgmi.connected_to_cpu) {
4025 arch_phys_wc_del(adev->gmc.vram_mtrr);
4026 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4027 }
4028}
4029
d38ceaf9 4030/**
bbe04dec 4031 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4032 *
4033 * @adev: amdgpu_device pointer
4034 *
4035 * Tear down the driver info (all asics).
4036 * Called at driver shutdown.
4037 */
72c8c97b 4038void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4039{
aac89168 4040 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4041 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4042 adev->shutdown = true;
9f875167 4043
752c683d
ML
4044 /* make sure IB test finished before entering exclusive mode
4045 * to avoid preemption on IB test
4046 * */
519b8b76 4047 if (amdgpu_sriov_vf(adev)) {
752c683d 4048 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4049 amdgpu_virt_fini_data_exchange(adev);
4050 }
752c683d 4051
e5b03032
ML
4052 /* disable all interrupts */
4053 amdgpu_irq_disable_all(adev);
47fc644f 4054 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4055 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4056 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4057 else
4a580877 4058 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4059 }
8d35a259 4060 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4061
cd3a8a59 4062 if (adev->mman.initialized)
9bff18d1 4063 drain_workqueue(adev->mman.bdev.wq);
98f56188 4064
53e9d836 4065 if (adev->pm.sysfs_initialized)
7c868b59 4066 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4067 if (adev->ucode_sysfs_en)
4068 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
4069 if (adev->psp_sysfs_en)
4070 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
4071 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4072
232d1d43
SY
4073 /* disable ras feature must before hw fini */
4074 amdgpu_ras_pre_fini(adev);
4075
e9669fb7 4076 amdgpu_device_ip_fini_early(adev);
d10d0daa 4077
a3848df6
YW
4078 amdgpu_irq_fini_hw(adev);
4079
b6fd6e0f
SK
4080 if (adev->mman.initialized)
4081 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4082
d10d0daa 4083 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4084
39934d3e
VP
4085 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4086 amdgpu_device_unmap_mmio(adev);
87172e89 4087
72c8c97b
AG
4088}
4089
4090void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4091{
62d5f9f7 4092 int idx;
d37a3929 4093 bool px;
62d5f9f7 4094
8d35a259 4095 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4096 amdgpu_device_ip_fini(adev);
b31d3063 4097 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4098 adev->accel_working = false;
68ce8b24 4099 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4100
4101 amdgpu_reset_fini(adev);
4102
d38ceaf9 4103 /* free i2c buses */
4562236b
HW
4104 if (!amdgpu_device_has_dc_support(adev))
4105 amdgpu_i2c_fini(adev);
bfca0289
SL
4106
4107 if (amdgpu_emu_mode != 1)
4108 amdgpu_atombios_fini(adev);
4109
d38ceaf9
AD
4110 kfree(adev->bios);
4111 adev->bios = NULL;
d37a3929
OC
4112
4113 px = amdgpu_device_supports_px(adev_to_drm(adev));
4114
4115 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4116 apple_gmux_detect(NULL, NULL)))
84c8b22e 4117 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4118
4119 if (px)
83ba126a 4120 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4121
38d6be81 4122 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4123 vga_client_unregister(adev->pdev);
e9bc1bf7 4124
62d5f9f7
LS
4125 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4126
4127 iounmap(adev->rmmio);
4128 adev->rmmio = NULL;
4129 amdgpu_device_doorbell_fini(adev);
4130 drm_dev_exit(idx);
4131 }
4132
d155bef0
AB
4133 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4134 amdgpu_pmu_fini(adev);
72de33f8 4135 if (adev->mman.discovery_bin)
a190d1c7 4136 amdgpu_discovery_fini(adev);
72c8c97b 4137
cfbb6b00
AG
4138 amdgpu_reset_put_reset_domain(adev->reset_domain);
4139 adev->reset_domain = NULL;
4140
72c8c97b
AG
4141 kfree(adev->pci_state);
4142
d38ceaf9
AD
4143}
4144
58144d28
ND
4145/**
4146 * amdgpu_device_evict_resources - evict device resources
4147 * @adev: amdgpu device object
4148 *
4149 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4150 * of the vram memory type. Mainly used for evicting device resources
4151 * at suspend time.
4152 *
4153 */
7863c155 4154static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4155{
7863c155
ML
4156 int ret;
4157
e53d9665
ML
4158 /* No need to evict vram on APUs for suspend to ram or s2idle */
4159 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4160 return 0;
58144d28 4161
7863c155
ML
4162 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4163 if (ret)
58144d28 4164 DRM_WARN("evicting device resources failed\n");
7863c155 4165 return ret;
58144d28 4166}
d38ceaf9
AD
4167
4168/*
4169 * Suspend & resume.
4170 */
4171/**
810ddc3a 4172 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4173 *
87e3f136 4174 * @dev: drm dev pointer
87e3f136 4175 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4176 *
4177 * Puts the hw in the suspend state (all asics).
4178 * Returns 0 for success or an error on failure.
4179 * Called at driver suspend.
4180 */
de185019 4181int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4182{
a2e15b0e 4183 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4184 int r = 0;
d38ceaf9 4185
d38ceaf9
AD
4186 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4187 return 0;
4188
44779b43 4189 adev->in_suspend = true;
3fa8f89d 4190
47ea2076
SF
4191 /* Evict the majority of BOs before grabbing the full access */
4192 r = amdgpu_device_evict_resources(adev);
4193 if (r)
4194 return r;
4195
d7274ec7
BZ
4196 if (amdgpu_sriov_vf(adev)) {
4197 amdgpu_virt_fini_data_exchange(adev);
4198 r = amdgpu_virt_request_full_gpu(adev, false);
4199 if (r)
4200 return r;
4201 }
4202
3fa8f89d
S
4203 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4204 DRM_WARN("smart shift update failed\n");
4205
5f818173 4206 if (fbcon)
087451f3 4207 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4208
beff74bc 4209 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4210
5e6932fe 4211 amdgpu_ras_suspend(adev);
4212
2196927b 4213 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4214
c004d44e 4215 if (!adev->in_s0ix)
5d3a2d95 4216 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4217
7863c155
ML
4218 r = amdgpu_device_evict_resources(adev);
4219 if (r)
4220 return r;
d38ceaf9 4221
8d35a259 4222 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4223
2196927b 4224 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4225
d7274ec7
BZ
4226 if (amdgpu_sriov_vf(adev))
4227 amdgpu_virt_release_full_gpu(adev, false);
4228
d38ceaf9
AD
4229 return 0;
4230}
4231
4232/**
810ddc3a 4233 * amdgpu_device_resume - initiate device resume
d38ceaf9 4234 *
87e3f136 4235 * @dev: drm dev pointer
87e3f136 4236 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4237 *
4238 * Bring the hw back to operating state (all asics).
4239 * Returns 0 for success or an error on failure.
4240 * Called at driver resume.
4241 */
de185019 4242int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4243{
1348969a 4244 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4245 int r = 0;
d38ceaf9 4246
d7274ec7
BZ
4247 if (amdgpu_sriov_vf(adev)) {
4248 r = amdgpu_virt_request_full_gpu(adev, true);
4249 if (r)
4250 return r;
4251 }
4252
d38ceaf9
AD
4253 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4254 return 0;
4255
62498733 4256 if (adev->in_s0ix)
bc143d8b 4257 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4258
d38ceaf9 4259 /* post card */
39c640c0 4260 if (amdgpu_device_need_post(adev)) {
4d2997ab 4261 r = amdgpu_device_asic_init(adev);
74b0b157 4262 if (r)
aac89168 4263 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4264 }
d38ceaf9 4265
06ec9070 4266 r = amdgpu_device_ip_resume(adev);
d7274ec7 4267
e6707218 4268 if (r) {
aac89168 4269 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4270 goto exit;
e6707218 4271 }
8d35a259 4272 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4273
06ec9070 4274 r = amdgpu_device_ip_late_init(adev);
03161a6e 4275 if (r)
3c22c1ea 4276 goto exit;
d38ceaf9 4277
beff74bc
AD
4278 queue_delayed_work(system_wq, &adev->delayed_init_work,
4279 msecs_to_jiffies(AMDGPU_RESUME_MS));
4280
c004d44e 4281 if (!adev->in_s0ix) {
5d3a2d95
AD
4282 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4283 if (r)
3c22c1ea 4284 goto exit;
5d3a2d95 4285 }
756e6880 4286
3c22c1ea
SF
4287exit:
4288 if (amdgpu_sriov_vf(adev)) {
4289 amdgpu_virt_init_data_exchange(adev);
4290 amdgpu_virt_release_full_gpu(adev, true);
4291 }
4292
4293 if (r)
4294 return r;
4295
96a5d8d4 4296 /* Make sure IB tests flushed */
beff74bc 4297 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4298
a2e15b0e 4299 if (fbcon)
087451f3 4300 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4301
5e6932fe 4302 amdgpu_ras_resume(adev);
4303
d09ef243
AD
4304 if (adev->mode_info.num_crtc) {
4305 /*
4306 * Most of the connector probing functions try to acquire runtime pm
4307 * refs to ensure that the GPU is powered on when connector polling is
4308 * performed. Since we're calling this from a runtime PM callback,
4309 * trying to acquire rpm refs will cause us to deadlock.
4310 *
4311 * Since we're guaranteed to be holding the rpm lock, it's safe to
4312 * temporarily disable the rpm helpers so this doesn't deadlock us.
4313 */
23a1a9e5 4314#ifdef CONFIG_PM
d09ef243 4315 dev->dev->power.disable_depth++;
23a1a9e5 4316#endif
d09ef243
AD
4317 if (!adev->dc_enabled)
4318 drm_helper_hpd_irq_event(dev);
4319 else
4320 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4321#ifdef CONFIG_PM
d09ef243 4322 dev->dev->power.disable_depth--;
23a1a9e5 4323#endif
d09ef243 4324 }
44779b43
RZ
4325 adev->in_suspend = false;
4326
dc907c9d
JX
4327 if (adev->enable_mes)
4328 amdgpu_mes_self_test(adev);
4329
3fa8f89d
S
4330 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4331 DRM_WARN("smart shift update failed\n");
4332
4d3b9ae5 4333 return 0;
d38ceaf9
AD
4334}
4335
e3ecdffa
AD
4336/**
4337 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4338 *
4339 * @adev: amdgpu_device pointer
4340 *
4341 * The list of all the hardware IPs that make up the asic is walked and
4342 * the check_soft_reset callbacks are run. check_soft_reset determines
4343 * if the asic is still hung or not.
4344 * Returns true if any of the IPs are still in a hung state, false if not.
4345 */
06ec9070 4346static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4347{
4348 int i;
4349 bool asic_hang = false;
4350
f993d628
ML
4351 if (amdgpu_sriov_vf(adev))
4352 return true;
4353
8bc04c29
AD
4354 if (amdgpu_asic_need_full_reset(adev))
4355 return true;
4356
63fbf42f 4357 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4358 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4359 continue;
a1255107
AD
4360 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4361 adev->ip_blocks[i].status.hang =
4362 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4363 if (adev->ip_blocks[i].status.hang) {
aac89168 4364 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4365 asic_hang = true;
4366 }
4367 }
4368 return asic_hang;
4369}
4370
e3ecdffa
AD
4371/**
4372 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4373 *
4374 * @adev: amdgpu_device pointer
4375 *
4376 * The list of all the hardware IPs that make up the asic is walked and the
4377 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4378 * handles any IP specific hardware or software state changes that are
4379 * necessary for a soft reset to succeed.
4380 * Returns 0 on success, negative error code on failure.
4381 */
06ec9070 4382static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4383{
4384 int i, r = 0;
4385
4386 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4387 if (!adev->ip_blocks[i].status.valid)
d31a501e 4388 continue;
a1255107
AD
4389 if (adev->ip_blocks[i].status.hang &&
4390 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4391 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4392 if (r)
4393 return r;
4394 }
4395 }
4396
4397 return 0;
4398}
4399
e3ecdffa
AD
4400/**
4401 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4402 *
4403 * @adev: amdgpu_device pointer
4404 *
4405 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4406 * reset is necessary to recover.
4407 * Returns true if a full asic reset is required, false if not.
4408 */
06ec9070 4409static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4410{
da146d3b
AD
4411 int i;
4412
8bc04c29
AD
4413 if (amdgpu_asic_need_full_reset(adev))
4414 return true;
4415
da146d3b 4416 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4417 if (!adev->ip_blocks[i].status.valid)
da146d3b 4418 continue;
a1255107
AD
4419 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4420 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4421 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4422 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4423 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4424 if (adev->ip_blocks[i].status.hang) {
aac89168 4425 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4426 return true;
4427 }
4428 }
35d782fe
CZ
4429 }
4430 return false;
4431}
4432
e3ecdffa
AD
4433/**
4434 * amdgpu_device_ip_soft_reset - do a soft reset
4435 *
4436 * @adev: amdgpu_device pointer
4437 *
4438 * The list of all the hardware IPs that make up the asic is walked and the
4439 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4440 * IP specific hardware or software state changes that are necessary to soft
4441 * reset the IP.
4442 * Returns 0 on success, negative error code on failure.
4443 */
06ec9070 4444static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4445{
4446 int i, r = 0;
4447
4448 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4449 if (!adev->ip_blocks[i].status.valid)
35d782fe 4450 continue;
a1255107
AD
4451 if (adev->ip_blocks[i].status.hang &&
4452 adev->ip_blocks[i].version->funcs->soft_reset) {
4453 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4454 if (r)
4455 return r;
4456 }
4457 }
4458
4459 return 0;
4460}
4461
e3ecdffa
AD
4462/**
4463 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4464 *
4465 * @adev: amdgpu_device pointer
4466 *
4467 * The list of all the hardware IPs that make up the asic is walked and the
4468 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4469 * handles any IP specific hardware or software state changes that are
4470 * necessary after the IP has been soft reset.
4471 * Returns 0 on success, negative error code on failure.
4472 */
06ec9070 4473static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4474{
4475 int i, r = 0;
4476
4477 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4478 if (!adev->ip_blocks[i].status.valid)
35d782fe 4479 continue;
a1255107
AD
4480 if (adev->ip_blocks[i].status.hang &&
4481 adev->ip_blocks[i].version->funcs->post_soft_reset)
4482 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4483 if (r)
4484 return r;
4485 }
4486
4487 return 0;
4488}
4489
e3ecdffa 4490/**
c33adbc7 4491 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4492 *
4493 * @adev: amdgpu_device pointer
4494 *
4495 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4496 * restore things like GPUVM page tables after a GPU reset where
4497 * the contents of VRAM might be lost.
403009bf
CK
4498 *
4499 * Returns:
4500 * 0 on success, negative error code on failure.
e3ecdffa 4501 */
c33adbc7 4502static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4503{
c41d1cf6 4504 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4505 struct amdgpu_bo *shadow;
e18aaea7 4506 struct amdgpu_bo_vm *vmbo;
403009bf 4507 long r = 1, tmo;
c41d1cf6
ML
4508
4509 if (amdgpu_sriov_runtime(adev))
b045d3af 4510 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4511 else
4512 tmo = msecs_to_jiffies(100);
4513
aac89168 4514 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4515 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4516 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4517 /* If vm is compute context or adev is APU, shadow will be NULL */
4518 if (!vmbo->shadow)
4519 continue;
4520 shadow = vmbo->shadow;
4521
403009bf 4522 /* No need to recover an evicted BO */
d3116756
CK
4523 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4524 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4525 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4526 continue;
4527
4528 r = amdgpu_bo_restore_shadow(shadow, &next);
4529 if (r)
4530 break;
4531
c41d1cf6 4532 if (fence) {
1712fb1a 4533 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4534 dma_fence_put(fence);
4535 fence = next;
1712fb1a 4536 if (tmo == 0) {
4537 r = -ETIMEDOUT;
c41d1cf6 4538 break;
1712fb1a 4539 } else if (tmo < 0) {
4540 r = tmo;
4541 break;
4542 }
403009bf
CK
4543 } else {
4544 fence = next;
c41d1cf6 4545 }
c41d1cf6
ML
4546 }
4547 mutex_unlock(&adev->shadow_list_lock);
4548
403009bf
CK
4549 if (fence)
4550 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4551 dma_fence_put(fence);
4552
1712fb1a 4553 if (r < 0 || tmo <= 0) {
aac89168 4554 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4555 return -EIO;
4556 }
c41d1cf6 4557
aac89168 4558 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4559 return 0;
c41d1cf6
ML
4560}
4561
a90ad3c2 4562
e3ecdffa 4563/**
06ec9070 4564 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4565 *
982a820b 4566 * @adev: amdgpu_device pointer
87e3f136 4567 * @from_hypervisor: request from hypervisor
5740682e
ML
4568 *
4569 * do VF FLR and reinitialize Asic
3f48c681 4570 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4571 */
4572static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4573 bool from_hypervisor)
5740682e
ML
4574{
4575 int r;
a5f67c93 4576 struct amdgpu_hive_info *hive = NULL;
7258fa31 4577 int retry_limit = 0;
5740682e 4578
7258fa31 4579retry:
c004d44e 4580 amdgpu_amdkfd_pre_reset(adev);
428890a3 4581
5740682e
ML
4582 if (from_hypervisor)
4583 r = amdgpu_virt_request_full_gpu(adev, true);
4584 else
4585 r = amdgpu_virt_reset_gpu(adev);
4586 if (r)
4587 return r;
a90ad3c2
ML
4588
4589 /* Resume IP prior to SMC */
06ec9070 4590 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4591 if (r)
4592 goto error;
a90ad3c2 4593
c9ffa427 4594 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4595
7a3e0bb2
RZ
4596 r = amdgpu_device_fw_loading(adev);
4597 if (r)
4598 return r;
4599
a90ad3c2 4600 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4601 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4602 if (r)
4603 goto error;
a90ad3c2 4604
a5f67c93
ZL
4605 hive = amdgpu_get_xgmi_hive(adev);
4606 /* Update PSP FW topology after reset */
4607 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4608 r = amdgpu_xgmi_update_topology(hive, adev);
4609
4610 if (hive)
4611 amdgpu_put_xgmi_hive(hive);
4612
4613 if (!r) {
4614 amdgpu_irq_gpu_reset_resume_helper(adev);
4615 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4616
c004d44e 4617 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4618 }
a90ad3c2 4619
abc34253 4620error:
c41d1cf6 4621 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4622 amdgpu_inc_vram_lost(adev);
c33adbc7 4623 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4624 }
437f3e0b 4625 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4626
7258fa31
SK
4627 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4628 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4629 retry_limit++;
4630 goto retry;
4631 } else
4632 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4633 }
4634
a90ad3c2
ML
4635 return r;
4636}
4637
9a1cddd6 4638/**
4639 * amdgpu_device_has_job_running - check if there is any job in mirror list
4640 *
982a820b 4641 * @adev: amdgpu_device pointer
9a1cddd6 4642 *
4643 * check if there is any job in mirror list
4644 */
4645bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4646{
4647 int i;
4648 struct drm_sched_job *job;
4649
4650 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4651 struct amdgpu_ring *ring = adev->rings[i];
4652
4653 if (!ring || !ring->sched.thread)
4654 continue;
4655
4656 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4657 job = list_first_entry_or_null(&ring->sched.pending_list,
4658 struct drm_sched_job, list);
9a1cddd6 4659 spin_unlock(&ring->sched.job_list_lock);
4660 if (job)
4661 return true;
4662 }
4663 return false;
4664}
4665
12938fad
CK
4666/**
4667 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4668 *
982a820b 4669 * @adev: amdgpu_device pointer
12938fad
CK
4670 *
4671 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4672 * a hung GPU.
4673 */
4674bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4675{
12938fad 4676
3ba7b418
AG
4677 if (amdgpu_gpu_recovery == 0)
4678 goto disabled;
4679
1a11a65d
YC
4680 /* Skip soft reset check in fatal error mode */
4681 if (!amdgpu_ras_is_poison_mode_supported(adev))
4682 return true;
4683
3ba7b418
AG
4684 if (amdgpu_sriov_vf(adev))
4685 return true;
4686
4687 if (amdgpu_gpu_recovery == -1) {
4688 switch (adev->asic_type) {
b3523c45
AD
4689#ifdef CONFIG_DRM_AMDGPU_SI
4690 case CHIP_VERDE:
4691 case CHIP_TAHITI:
4692 case CHIP_PITCAIRN:
4693 case CHIP_OLAND:
4694 case CHIP_HAINAN:
4695#endif
4696#ifdef CONFIG_DRM_AMDGPU_CIK
4697 case CHIP_KAVERI:
4698 case CHIP_KABINI:
4699 case CHIP_MULLINS:
4700#endif
4701 case CHIP_CARRIZO:
4702 case CHIP_STONEY:
4703 case CHIP_CYAN_SKILLFISH:
3ba7b418 4704 goto disabled;
b3523c45
AD
4705 default:
4706 break;
3ba7b418 4707 }
12938fad
CK
4708 }
4709
4710 return true;
3ba7b418
AG
4711
4712disabled:
aac89168 4713 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4714 return false;
12938fad
CK
4715}
4716
5c03e584
FX
4717int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4718{
47fc644f
SS
4719 u32 i;
4720 int ret = 0;
5c03e584 4721
47fc644f 4722 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4723
47fc644f 4724 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4725
47fc644f
SS
4726 /* disable BM */
4727 pci_clear_master(adev->pdev);
5c03e584 4728
47fc644f 4729 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4730
47fc644f
SS
4731 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4732 dev_info(adev->dev, "GPU smu mode1 reset\n");
4733 ret = amdgpu_dpm_mode1_reset(adev);
4734 } else {
4735 dev_info(adev->dev, "GPU psp mode1 reset\n");
4736 ret = psp_gpu_reset(adev);
4737 }
5c03e584 4738
47fc644f
SS
4739 if (ret)
4740 dev_err(adev->dev, "GPU mode1 reset failed\n");
5c03e584 4741
47fc644f 4742 amdgpu_device_load_pci_state(adev->pdev);
5c03e584 4743
47fc644f
SS
4744 /* wait for asic to come out of reset */
4745 for (i = 0; i < adev->usec_timeout; i++) {
4746 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4747
47fc644f
SS
4748 if (memsize != 0xffffffff)
4749 break;
4750 udelay(1);
4751 }
5c03e584 4752
47fc644f
SS
4753 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4754 return ret;
5c03e584 4755}
5c6dd71e 4756
e3c1b071 4757int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4758 struct amdgpu_reset_context *reset_context)
26bc5340 4759{
5c1e6fa4 4760 int i, r = 0;
04442bf7
LL
4761 struct amdgpu_job *job = NULL;
4762 bool need_full_reset =
4763 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4764
4765 if (reset_context->reset_req_dev == adev)
4766 job = reset_context->job;
71182665 4767
b602ca5f
TZ
4768 if (amdgpu_sriov_vf(adev)) {
4769 /* stop the data exchange thread */
4770 amdgpu_virt_fini_data_exchange(adev);
4771 }
4772
9e225fb9
AG
4773 amdgpu_fence_driver_isr_toggle(adev, true);
4774
71182665 4775 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4776 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4777 struct amdgpu_ring *ring = adev->rings[i];
4778
51687759 4779 if (!ring || !ring->sched.thread)
0875dc9e 4780 continue;
5740682e 4781
c530b02f
JZ
4782 /*clear job fence from fence drv to avoid force_completion
4783 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4784 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4785
2f9d4084
ML
4786 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4787 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4788 }
d38ceaf9 4789
9e225fb9
AG
4790 amdgpu_fence_driver_isr_toggle(adev, false);
4791
ff99849b 4792 if (job && job->vm)
222b5f04
AG
4793 drm_sched_increase_karma(&job->base);
4794
04442bf7 4795 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4796 /* If reset handler not implemented, continue; otherwise return */
4797 if (r == -ENOSYS)
4798 r = 0;
4799 else
04442bf7
LL
4800 return r;
4801
1d721ed6 4802 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4803 if (!amdgpu_sriov_vf(adev)) {
4804
4805 if (!need_full_reset)
4806 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4807
360cd081
LG
4808 if (!need_full_reset && amdgpu_gpu_recovery &&
4809 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4810 amdgpu_device_ip_pre_soft_reset(adev);
4811 r = amdgpu_device_ip_soft_reset(adev);
4812 amdgpu_device_ip_post_soft_reset(adev);
4813 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4814 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4815 need_full_reset = true;
4816 }
4817 }
4818
4819 if (need_full_reset)
4820 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4821 if (need_full_reset)
4822 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4823 else
4824 clear_bit(AMDGPU_NEED_FULL_RESET,
4825 &reset_context->flags);
26bc5340
AG
4826 }
4827
4828 return r;
4829}
4830
15fd09a0
SA
4831static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4832{
15fd09a0
SA
4833 int i;
4834
38a15ad9 4835 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4836
4837 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4838 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4839 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4840 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4841 }
4842
4843 return 0;
4844}
4845
3d8785f6
SA
4846#ifdef CONFIG_DEV_COREDUMP
4847static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4848 size_t count, void *data, size_t datalen)
4849{
4850 struct drm_printer p;
4851 struct amdgpu_device *adev = data;
4852 struct drm_print_iterator iter;
4853 int i;
4854
4855 iter.data = buffer;
4856 iter.offset = 0;
4857 iter.start = offset;
4858 iter.remain = count;
4859
4860 p = drm_coredump_printer(&iter);
4861
4862 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4863 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4864 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4865 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4866 if (adev->reset_task_info.pid)
4867 drm_printf(&p, "process_name: %s PID: %d\n",
4868 adev->reset_task_info.process_name,
4869 adev->reset_task_info.pid);
4870
4871 if (adev->reset_vram_lost)
4872 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4873 if (adev->num_regs) {
4874 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4875
4876 for (i = 0; i < adev->num_regs; i++)
4877 drm_printf(&p, "0x%08x: 0x%08x\n",
4878 adev->reset_dump_reg_list[i],
4879 adev->reset_dump_reg_value[i]);
4880 }
4881
4882 return count - iter.remain;
4883}
4884
4885static void amdgpu_devcoredump_free(void *data)
4886{
4887}
4888
4889static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4890{
4891 struct drm_device *dev = adev_to_drm(adev);
4892
4893 ktime_get_ts64(&adev->reset_time);
4894 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4895 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4896}
4897#endif
4898
04442bf7
LL
4899int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4900 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4901{
4902 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4903 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4904 int r = 0;
f5c7e779 4905 bool gpu_reset_for_dev_remove = 0;
26bc5340 4906
04442bf7
LL
4907 /* Try reset handler method first */
4908 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4909 reset_list);
15fd09a0 4910 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4911
4912 reset_context->reset_device_list = device_list_handle;
04442bf7 4913 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4914 /* If reset handler not implemented, continue; otherwise return */
4915 if (r == -ENOSYS)
4916 r = 0;
4917 else
04442bf7
LL
4918 return r;
4919
4920 /* Reset handler not implemented, use the default method */
4921 need_full_reset =
4922 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4923 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4924
f5c7e779
YC
4925 gpu_reset_for_dev_remove =
4926 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4927 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4928
26bc5340 4929 /*
655ce9cb 4930 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4931 * to allow proper links negotiation in FW (within 1 sec)
4932 */
7ac71382 4933 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4934 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4935 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4936 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4937 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4938 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4939 r = -EALREADY;
4940 } else
4941 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4942
041a62bc 4943 if (r) {
aac89168 4944 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4945 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4946 break;
ce316fa5
LM
4947 }
4948 }
4949
041a62bc
AG
4950 /* For XGMI wait for all resets to complete before proceed */
4951 if (!r) {
655ce9cb 4952 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4953 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4954 flush_work(&tmp_adev->xgmi_reset_work);
4955 r = tmp_adev->asic_reset_res;
4956 if (r)
4957 break;
ce316fa5
LM
4958 }
4959 }
4960 }
ce316fa5 4961 }
26bc5340 4962
43c4d576 4963 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4964 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4965 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4966 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4967 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4968 }
4969
00eaa571 4970 amdgpu_ras_intr_cleared();
43c4d576 4971 }
00eaa571 4972
f5c7e779
YC
4973 /* Since the mode1 reset affects base ip blocks, the
4974 * phase1 ip blocks need to be resumed. Otherwise there
4975 * will be a BIOS signature error and the psp bootloader
4976 * can't load kdb on the next amdgpu install.
4977 */
4978 if (gpu_reset_for_dev_remove) {
4979 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4980 amdgpu_device_ip_resume_phase1(tmp_adev);
4981
4982 goto end;
4983 }
4984
655ce9cb 4985 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4986 if (need_full_reset) {
4987 /* post card */
e3c1b071 4988 r = amdgpu_device_asic_init(tmp_adev);
4989 if (r) {
aac89168 4990 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4991 } else {
26bc5340 4992 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4993 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4994 if (r)
4995 goto out;
4996
26bc5340
AG
4997 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4998 if (r)
4999 goto out;
5000
5001 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
5002#ifdef CONFIG_DEV_COREDUMP
5003 tmp_adev->reset_vram_lost = vram_lost;
5004 memset(&tmp_adev->reset_task_info, 0,
5005 sizeof(tmp_adev->reset_task_info));
5006 if (reset_context->job && reset_context->job->vm)
5007 tmp_adev->reset_task_info =
5008 reset_context->job->vm->task_info;
5009 amdgpu_reset_capture_coredumpm(tmp_adev);
5010#endif
26bc5340 5011 if (vram_lost) {
77e7f829 5012 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5013 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5014 }
5015
26bc5340
AG
5016 r = amdgpu_device_fw_loading(tmp_adev);
5017 if (r)
5018 return r;
5019
5020 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5021 if (r)
5022 goto out;
5023
5024 if (vram_lost)
5025 amdgpu_device_fill_reset_magic(tmp_adev);
5026
fdafb359
EQ
5027 /*
5028 * Add this ASIC as tracked as reset was already
5029 * complete successfully.
5030 */
5031 amdgpu_register_gpu_instance(tmp_adev);
5032
04442bf7
LL
5033 if (!reset_context->hive &&
5034 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5035 amdgpu_xgmi_add_device(tmp_adev);
5036
7c04ca50 5037 r = amdgpu_device_ip_late_init(tmp_adev);
5038 if (r)
5039 goto out;
5040
087451f3 5041 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5042
e8fbaf03
GC
5043 /*
5044 * The GPU enters bad state once faulty pages
5045 * by ECC has reached the threshold, and ras
5046 * recovery is scheduled next. So add one check
5047 * here to break recovery if it indeed exceeds
5048 * bad page threshold, and remind user to
5049 * retire this GPU or setting one bigger
5050 * bad_page_threshold value to fix this once
5051 * probing driver again.
5052 */
11003c68 5053 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5054 /* must succeed. */
5055 amdgpu_ras_resume(tmp_adev);
5056 } else {
5057 r = -EINVAL;
5058 goto out;
5059 }
e79a04d5 5060
26bc5340 5061 /* Update PSP FW topology after reset */
04442bf7
LL
5062 if (reset_context->hive &&
5063 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5064 r = amdgpu_xgmi_update_topology(
5065 reset_context->hive, tmp_adev);
26bc5340
AG
5066 }
5067 }
5068
26bc5340
AG
5069out:
5070 if (!r) {
5071 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5072 r = amdgpu_ib_ring_tests(tmp_adev);
5073 if (r) {
5074 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5075 need_full_reset = true;
5076 r = -EAGAIN;
5077 goto end;
5078 }
5079 }
5080
5081 if (!r)
5082 r = amdgpu_device_recover_vram(tmp_adev);
5083 else
5084 tmp_adev->asic_reset_res = r;
5085 }
5086
5087end:
04442bf7
LL
5088 if (need_full_reset)
5089 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5090 else
5091 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5092 return r;
5093}
5094
e923be99 5095static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5096{
5740682e 5097
a3a09142
AD
5098 switch (amdgpu_asic_reset_method(adev)) {
5099 case AMD_RESET_METHOD_MODE1:
5100 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5101 break;
5102 case AMD_RESET_METHOD_MODE2:
5103 adev->mp1_state = PP_MP1_STATE_RESET;
5104 break;
5105 default:
5106 adev->mp1_state = PP_MP1_STATE_NONE;
5107 break;
5108 }
26bc5340 5109}
d38ceaf9 5110
e923be99 5111static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5112{
89041940 5113 amdgpu_vf_error_trans_all(adev);
a3a09142 5114 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5115}
5116
3f12acc8
EQ
5117static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5118{
5119 struct pci_dev *p = NULL;
5120
5121 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5122 adev->pdev->bus->number, 1);
5123 if (p) {
5124 pm_runtime_enable(&(p->dev));
5125 pm_runtime_resume(&(p->dev));
5126 }
b85e285e
YY
5127
5128 pci_dev_put(p);
3f12acc8
EQ
5129}
5130
5131static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5132{
5133 enum amd_reset_method reset_method;
5134 struct pci_dev *p = NULL;
5135 u64 expires;
5136
5137 /*
5138 * For now, only BACO and mode1 reset are confirmed
5139 * to suffer the audio issue without proper suspended.
5140 */
5141 reset_method = amdgpu_asic_reset_method(adev);
5142 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5143 (reset_method != AMD_RESET_METHOD_MODE1))
5144 return -EINVAL;
5145
5146 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5147 adev->pdev->bus->number, 1);
5148 if (!p)
5149 return -ENODEV;
5150
5151 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5152 if (!expires)
5153 /*
5154 * If we cannot get the audio device autosuspend delay,
5155 * a fixed 4S interval will be used. Considering 3S is
5156 * the audio controller default autosuspend delay setting.
5157 * 4S used here is guaranteed to cover that.
5158 */
54b7feb9 5159 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5160
5161 while (!pm_runtime_status_suspended(&(p->dev))) {
5162 if (!pm_runtime_suspend(&(p->dev)))
5163 break;
5164
5165 if (expires < ktime_get_mono_fast_ns()) {
5166 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5167 pci_dev_put(p);
3f12acc8
EQ
5168 /* TODO: abort the succeeding gpu reset? */
5169 return -ETIMEDOUT;
5170 }
5171 }
5172
5173 pm_runtime_disable(&(p->dev));
5174
b85e285e 5175 pci_dev_put(p);
3f12acc8
EQ
5176 return 0;
5177}
5178
d193b12b 5179static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5180{
5181 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5182
5183#if defined(CONFIG_DEBUG_FS)
5184 if (!amdgpu_sriov_vf(adev))
5185 cancel_work(&adev->reset_work);
5186#endif
5187
5188 if (adev->kfd.dev)
5189 cancel_work(&adev->kfd.reset_work);
5190
5191 if (amdgpu_sriov_vf(adev))
5192 cancel_work(&adev->virt.flr_work);
5193
5194 if (con && adev->ras_enabled)
5195 cancel_work(&con->recovery_work);
5196
5197}
5198
26bc5340 5199/**
6e9c65f7 5200 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5201 *
982a820b 5202 * @adev: amdgpu_device pointer
26bc5340 5203 * @job: which job trigger hang
80bd2de1 5204 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5205 *
5206 * Attempt to reset the GPU if it has hung (all asics).
5207 * Attempt to do soft-reset or full-reset and reinitialize Asic
5208 * Returns 0 for success or an error on failure.
5209 */
5210
cf727044 5211int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5212 struct amdgpu_job *job,
5213 struct amdgpu_reset_context *reset_context)
26bc5340 5214{
1d721ed6 5215 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5216 bool job_signaled = false;
26bc5340 5217 struct amdgpu_hive_info *hive = NULL;
26bc5340 5218 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5219 int i, r = 0;
bb5c7235 5220 bool need_emergency_restart = false;
3f12acc8 5221 bool audio_suspended = false;
f5c7e779
YC
5222 bool gpu_reset_for_dev_remove = false;
5223
5224 gpu_reset_for_dev_remove =
5225 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5226 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5227
6e3cd2a9 5228 /*
bb5c7235
WS
5229 * Special case: RAS triggered and full reset isn't supported
5230 */
5231 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5232
d5ea093e
AG
5233 /*
5234 * Flush RAM to disk so that after reboot
5235 * the user can read log and see why the system rebooted.
5236 */
bb5c7235 5237 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5238 DRM_WARN("Emergency reboot.");
5239
5240 ksys_sync_helper();
5241 emergency_restart();
5242 }
5243
b823821f 5244 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5245 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5246
175ac6ec
ZL
5247 if (!amdgpu_sriov_vf(adev))
5248 hive = amdgpu_get_xgmi_hive(adev);
681260df 5249 if (hive)
53b3f8f4 5250 mutex_lock(&hive->hive_lock);
26bc5340 5251
f1549c09
LG
5252 reset_context->job = job;
5253 reset_context->hive = hive;
9e94d22c
EQ
5254 /*
5255 * Build list of devices to reset.
5256 * In case we are in XGMI hive mode, resort the device list
5257 * to put adev in the 1st position.
5258 */
5259 INIT_LIST_HEAD(&device_list);
175ac6ec 5260 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5261 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5262 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5263 if (gpu_reset_for_dev_remove && adev->shutdown)
5264 tmp_adev->shutdown = true;
5265 }
655ce9cb 5266 if (!list_is_first(&adev->reset_list, &device_list))
5267 list_rotate_to_front(&adev->reset_list, &device_list);
5268 device_list_handle = &device_list;
26bc5340 5269 } else {
655ce9cb 5270 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5271 device_list_handle = &device_list;
5272 }
5273
e923be99
AG
5274 /* We need to lock reset domain only once both for XGMI and single device */
5275 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5276 reset_list);
3675c2f2 5277 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5278
1d721ed6 5279 /* block all schedulers and reset given job's ring */
655ce9cb 5280 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5281
e923be99 5282 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5283
3f12acc8
EQ
5284 /*
5285 * Try to put the audio codec into suspend state
5286 * before gpu reset started.
5287 *
5288 * Due to the power domain of the graphics device
5289 * is shared with AZ power domain. Without this,
5290 * we may change the audio hardware from behind
5291 * the audio driver's back. That will trigger
5292 * some audio codec errors.
5293 */
5294 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5295 audio_suspended = true;
5296
9e94d22c
EQ
5297 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5298
52fb44cf
EQ
5299 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5300
c004d44e 5301 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5302 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5303
12ffa55d
AG
5304 /*
5305 * Mark these ASICs to be reseted as untracked first
5306 * And add them back after reset completed
5307 */
5308 amdgpu_unregister_gpu_instance(tmp_adev);
5309
163d4cd2 5310 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5311
f1c1314b 5312 /* disable ras on ALL IPs */
bb5c7235 5313 if (!need_emergency_restart &&
b823821f 5314 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5315 amdgpu_ras_suspend(tmp_adev);
5316
1d721ed6
AG
5317 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5318 struct amdgpu_ring *ring = tmp_adev->rings[i];
5319
5320 if (!ring || !ring->sched.thread)
5321 continue;
5322
0b2d2c2e 5323 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5324
bb5c7235 5325 if (need_emergency_restart)
7c6e68c7 5326 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5327 }
8f8c80f4 5328 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5329 }
5330
bb5c7235 5331 if (need_emergency_restart)
7c6e68c7
AG
5332 goto skip_sched_resume;
5333
1d721ed6
AG
5334 /*
5335 * Must check guilty signal here since after this point all old
5336 * HW fences are force signaled.
5337 *
5338 * job->base holds a reference to parent fence
5339 */
f6a3f660 5340 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5341 job_signaled = true;
1d721ed6
AG
5342 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5343 goto skip_hw_reset;
5344 }
5345
26bc5340 5346retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5347 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5348 if (gpu_reset_for_dev_remove) {
5349 /* Workaroud for ASICs need to disable SMC first */
5350 amdgpu_device_smu_fini_early(tmp_adev);
5351 }
f1549c09 5352 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5353 /*TODO Should we stop ?*/
5354 if (r) {
aac89168 5355 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5356 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5357 tmp_adev->asic_reset_res = r;
5358 }
247c7b0d
AG
5359
5360 /*
5361 * Drop all pending non scheduler resets. Scheduler resets
5362 * were already dropped during drm_sched_stop
5363 */
d193b12b 5364 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5365 }
5366
5367 /* Actual ASIC resets if needed.*/
4f30d920 5368 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5369 if (amdgpu_sriov_vf(adev)) {
5370 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5371 if (r)
5372 adev->asic_reset_res = r;
950d6425 5373
28606c4e
YC
5374 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5375 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5376 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
950d6425 5377 amdgpu_ras_resume(adev);
26bc5340 5378 } else {
f1549c09 5379 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5380 if (r && r == -EAGAIN)
26bc5340 5381 goto retry;
f5c7e779
YC
5382
5383 if (!r && gpu_reset_for_dev_remove)
5384 goto recover_end;
26bc5340
AG
5385 }
5386
1d721ed6
AG
5387skip_hw_reset:
5388
26bc5340 5389 /* Post ASIC reset for all devs .*/
655ce9cb 5390 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5391
1d721ed6
AG
5392 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5393 struct amdgpu_ring *ring = tmp_adev->rings[i];
5394
5395 if (!ring || !ring->sched.thread)
5396 continue;
5397
6868a2c4 5398 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5399 }
5400
693073a0 5401 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5402 amdgpu_mes_self_test(tmp_adev);
5403
1053b9c9 5404 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5405 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5406 }
5407
7258fa31
SK
5408 if (tmp_adev->asic_reset_res)
5409 r = tmp_adev->asic_reset_res;
5410
1d721ed6 5411 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5412
5413 if (r) {
5414 /* bad news, how to tell it to userspace ? */
12ffa55d 5415 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5416 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5417 } else {
12ffa55d 5418 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5419 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5420 DRM_WARN("smart shift update failed\n");
26bc5340 5421 }
7c6e68c7 5422 }
26bc5340 5423
7c6e68c7 5424skip_sched_resume:
655ce9cb 5425 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5426 /* unlock kfd: SRIOV would do it separately */
c004d44e 5427 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5428 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5429
5430 /* kfd_post_reset will do nothing if kfd device is not initialized,
5431 * need to bring up kfd here if it's not be initialized before
5432 */
5433 if (!adev->kfd.init_complete)
5434 amdgpu_amdkfd_device_init(adev);
5435
3f12acc8
EQ
5436 if (audio_suspended)
5437 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5438
5439 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5440
5441 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5442 }
5443
f5c7e779 5444recover_end:
e923be99
AG
5445 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5446 reset_list);
5447 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5448
9e94d22c 5449 if (hive) {
9e94d22c 5450 mutex_unlock(&hive->hive_lock);
d95e8e97 5451 amdgpu_put_xgmi_hive(hive);
9e94d22c 5452 }
26bc5340 5453
f287a3c5 5454 if (r)
26bc5340 5455 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5456
5457 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5458 return r;
5459}
5460
e3ecdffa
AD
5461/**
5462 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5463 *
5464 * @adev: amdgpu_device pointer
5465 *
5466 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5467 * and lanes) of the slot the device is in. Handles APUs and
5468 * virtualized environments where PCIE config space may not be available.
5469 */
5494d864 5470static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5471{
5d9a6330 5472 struct pci_dev *pdev;
c5313457
HK
5473 enum pci_bus_speed speed_cap, platform_speed_cap;
5474 enum pcie_link_width platform_link_width;
d0dd7f0c 5475
cd474ba0
AD
5476 if (amdgpu_pcie_gen_cap)
5477 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5478
cd474ba0
AD
5479 if (amdgpu_pcie_lane_cap)
5480 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5481
cd474ba0
AD
5482 /* covers APUs as well */
5483 if (pci_is_root_bus(adev->pdev->bus)) {
5484 if (adev->pm.pcie_gen_mask == 0)
5485 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5486 if (adev->pm.pcie_mlw_mask == 0)
5487 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5488 return;
cd474ba0 5489 }
d0dd7f0c 5490
c5313457
HK
5491 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5492 return;
5493
dbaa922b
AD
5494 pcie_bandwidth_available(adev->pdev, NULL,
5495 &platform_speed_cap, &platform_link_width);
c5313457 5496
cd474ba0 5497 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5498 /* asic caps */
5499 pdev = adev->pdev;
5500 speed_cap = pcie_get_speed_cap(pdev);
5501 if (speed_cap == PCI_SPEED_UNKNOWN) {
5502 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5503 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5504 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5505 } else {
2b3a1f51
FX
5506 if (speed_cap == PCIE_SPEED_32_0GT)
5507 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5508 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5509 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5510 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5511 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5512 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5513 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5514 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5515 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5516 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5517 else if (speed_cap == PCIE_SPEED_8_0GT)
5518 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5519 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5520 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5521 else if (speed_cap == PCIE_SPEED_5_0GT)
5522 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5523 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5524 else
5525 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5526 }
5527 /* platform caps */
c5313457 5528 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5529 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5530 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5531 } else {
2b3a1f51
FX
5532 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5533 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5534 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5535 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5536 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5537 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5538 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5539 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5540 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5541 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5542 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5543 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5544 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5545 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5546 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5547 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5548 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5549 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5550 else
5551 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5552
cd474ba0
AD
5553 }
5554 }
5555 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5556 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5557 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5558 } else {
c5313457 5559 switch (platform_link_width) {
5d9a6330 5560 case PCIE_LNK_X32:
cd474ba0
AD
5561 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5562 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5563 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5564 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5565 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5566 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5567 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5568 break;
5d9a6330 5569 case PCIE_LNK_X16:
cd474ba0
AD
5570 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5571 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5572 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5573 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5574 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5575 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5576 break;
5d9a6330 5577 case PCIE_LNK_X12:
cd474ba0
AD
5578 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5579 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5580 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5581 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5582 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5583 break;
5d9a6330 5584 case PCIE_LNK_X8:
cd474ba0
AD
5585 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5586 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5587 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5588 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5589 break;
5d9a6330 5590 case PCIE_LNK_X4:
cd474ba0
AD
5591 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5592 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5593 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5594 break;
5d9a6330 5595 case PCIE_LNK_X2:
cd474ba0
AD
5596 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5597 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5598 break;
5d9a6330 5599 case PCIE_LNK_X1:
cd474ba0
AD
5600 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5601 break;
5602 default:
5603 break;
5604 }
d0dd7f0c
AD
5605 }
5606 }
5607}
d38ceaf9 5608
08a2fd23
RE
5609/**
5610 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5611 *
5612 * @adev: amdgpu_device pointer
5613 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5614 *
5615 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5616 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5617 * @peer_adev.
5618 */
5619bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5620 struct amdgpu_device *peer_adev)
5621{
5622#ifdef CONFIG_HSA_AMD_P2P
5623 uint64_t address_mask = peer_adev->dev->dma_mask ?
5624 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5625 resource_size_t aper_limit =
5626 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5627 bool p2p_access =
5628 !adev->gmc.xgmi.connected_to_cpu &&
5629 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5630
5631 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5632 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5633 !(adev->gmc.aper_base & address_mask ||
5634 aper_limit & address_mask));
5635#else
5636 return false;
5637#endif
5638}
5639
361dbd01
AD
5640int amdgpu_device_baco_enter(struct drm_device *dev)
5641{
1348969a 5642 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5643 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5644
6ab68650 5645 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5646 return -ENOTSUPP;
5647
8ab0d6f0 5648 if (ras && adev->ras_enabled &&
acdae216 5649 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5650 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5651
9530273e 5652 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5653}
5654
5655int amdgpu_device_baco_exit(struct drm_device *dev)
5656{
1348969a 5657 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5658 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5659 int ret = 0;
361dbd01 5660
6ab68650 5661 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5662 return -ENOTSUPP;
5663
9530273e
EQ
5664 ret = amdgpu_dpm_baco_exit(adev);
5665 if (ret)
5666 return ret;
7a22677b 5667
8ab0d6f0 5668 if (ras && adev->ras_enabled &&
acdae216 5669 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5670 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5671
1bece222
CL
5672 if (amdgpu_passthrough(adev) &&
5673 adev->nbio.funcs->clear_doorbell_interrupt)
5674 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5675
7a22677b 5676 return 0;
361dbd01 5677}
c9a6b82f
AG
5678
5679/**
5680 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5681 * @pdev: PCI device struct
5682 * @state: PCI channel state
5683 *
5684 * Description: Called when a PCI error is detected.
5685 *
5686 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5687 */
5688pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5689{
5690 struct drm_device *dev = pci_get_drvdata(pdev);
5691 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5692 int i;
c9a6b82f
AG
5693
5694 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5695
6894305c
AG
5696 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5697 DRM_WARN("No support for XGMI hive yet...");
5698 return PCI_ERS_RESULT_DISCONNECT;
5699 }
5700
e17e27f9
GC
5701 adev->pci_channel_state = state;
5702
c9a6b82f
AG
5703 switch (state) {
5704 case pci_channel_io_normal:
5705 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5706 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5707 case pci_channel_io_frozen:
5708 /*
d0fb18b5 5709 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5710 * to GPU during PCI error recovery
5711 */
3675c2f2 5712 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5713 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5714
5715 /*
5716 * Block any work scheduling as we do for regular GPU reset
5717 * for the duration of the recovery
5718 */
5719 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5720 struct amdgpu_ring *ring = adev->rings[i];
5721
5722 if (!ring || !ring->sched.thread)
5723 continue;
5724
5725 drm_sched_stop(&ring->sched, NULL);
5726 }
8f8c80f4 5727 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5728 return PCI_ERS_RESULT_NEED_RESET;
5729 case pci_channel_io_perm_failure:
5730 /* Permanent error, prepare for device removal */
5731 return PCI_ERS_RESULT_DISCONNECT;
5732 }
5733
5734 return PCI_ERS_RESULT_NEED_RESET;
5735}
5736
5737/**
5738 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5739 * @pdev: pointer to PCI device
5740 */
5741pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5742{
5743
5744 DRM_INFO("PCI error: mmio enabled callback!!\n");
5745
5746 /* TODO - dump whatever for debugging purposes */
5747
5748 /* This called only if amdgpu_pci_error_detected returns
5749 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5750 * works, no need to reset slot.
5751 */
5752
5753 return PCI_ERS_RESULT_RECOVERED;
5754}
5755
5756/**
5757 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5758 * @pdev: PCI device struct
5759 *
5760 * Description: This routine is called by the pci error recovery
5761 * code after the PCI slot has been reset, just before we
5762 * should resume normal operations.
5763 */
5764pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5765{
5766 struct drm_device *dev = pci_get_drvdata(pdev);
5767 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5768 int r, i;
04442bf7 5769 struct amdgpu_reset_context reset_context;
362c7b91 5770 u32 memsize;
7ac71382 5771 struct list_head device_list;
c9a6b82f
AG
5772
5773 DRM_INFO("PCI error: slot reset callback!!\n");
5774
04442bf7
LL
5775 memset(&reset_context, 0, sizeof(reset_context));
5776
7ac71382 5777 INIT_LIST_HEAD(&device_list);
655ce9cb 5778 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5779
362c7b91
AG
5780 /* wait for asic to come out of reset */
5781 msleep(500);
5782
7ac71382 5783 /* Restore PCI confspace */
c1dd4aa6 5784 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5785
362c7b91
AG
5786 /* confirm ASIC came out of reset */
5787 for (i = 0; i < adev->usec_timeout; i++) {
5788 memsize = amdgpu_asic_get_config_memsize(adev);
5789
5790 if (memsize != 0xffffffff)
5791 break;
5792 udelay(1);
5793 }
5794 if (memsize == 0xffffffff) {
5795 r = -ETIME;
5796 goto out;
5797 }
5798
04442bf7
LL
5799 reset_context.method = AMD_RESET_METHOD_NONE;
5800 reset_context.reset_req_dev = adev;
5801 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5802 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5803
7afefb81 5804 adev->no_hw_access = true;
04442bf7 5805 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5806 adev->no_hw_access = false;
c9a6b82f
AG
5807 if (r)
5808 goto out;
5809
04442bf7 5810 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5811
5812out:
c9a6b82f 5813 if (!r) {
c1dd4aa6
AG
5814 if (amdgpu_device_cache_pci_state(adev->pdev))
5815 pci_restore_state(adev->pdev);
5816
c9a6b82f
AG
5817 DRM_INFO("PCIe error recovery succeeded\n");
5818 } else {
5819 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5820 amdgpu_device_unset_mp1_state(adev);
5821 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5822 }
5823
5824 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5825}
5826
5827/**
5828 * amdgpu_pci_resume() - resume normal ops after PCI reset
5829 * @pdev: pointer to PCI device
5830 *
5831 * Called when the error recovery driver tells us that its
505199a3 5832 * OK to resume normal operation.
c9a6b82f
AG
5833 */
5834void amdgpu_pci_resume(struct pci_dev *pdev)
5835{
5836 struct drm_device *dev = pci_get_drvdata(pdev);
5837 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5838 int i;
c9a6b82f 5839
c9a6b82f
AG
5840
5841 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5842
e17e27f9
GC
5843 /* Only continue execution for the case of pci_channel_io_frozen */
5844 if (adev->pci_channel_state != pci_channel_io_frozen)
5845 return;
5846
acd89fca
AG
5847 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5848 struct amdgpu_ring *ring = adev->rings[i];
5849
5850 if (!ring || !ring->sched.thread)
5851 continue;
5852
acd89fca
AG
5853 drm_sched_start(&ring->sched, true);
5854 }
5855
e923be99
AG
5856 amdgpu_device_unset_mp1_state(adev);
5857 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5858}
c1dd4aa6
AG
5859
5860bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5861{
5862 struct drm_device *dev = pci_get_drvdata(pdev);
5863 struct amdgpu_device *adev = drm_to_adev(dev);
5864 int r;
5865
5866 r = pci_save_state(pdev);
5867 if (!r) {
5868 kfree(adev->pci_state);
5869
5870 adev->pci_state = pci_store_saved_state(pdev);
5871
5872 if (!adev->pci_state) {
5873 DRM_ERROR("Failed to store PCI saved state");
5874 return false;
5875 }
5876 } else {
5877 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5878 return false;
5879 }
5880
5881 return true;
5882}
5883
5884bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5885{
5886 struct drm_device *dev = pci_get_drvdata(pdev);
5887 struct amdgpu_device *adev = drm_to_adev(dev);
5888 int r;
5889
5890 if (!adev->pci_state)
5891 return false;
5892
5893 r = pci_load_saved_state(pdev, adev->pci_state);
5894
5895 if (!r) {
5896 pci_restore_state(pdev);
5897 } else {
5898 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5899 return false;
5900 }
5901
5902 return true;
5903}
5904
810085dd
EH
5905void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5906 struct amdgpu_ring *ring)
5907{
5908#ifdef CONFIG_X86_64
b818a5d3 5909 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5910 return;
5911#endif
5912 if (adev->gmc.xgmi.connected_to_cpu)
5913 return;
5914
5915 if (ring && ring->funcs->emit_hdp_flush)
5916 amdgpu_ring_emit_hdp_flush(ring);
5917 else
5918 amdgpu_asic_flush_hdp(adev, ring);
5919}
c1dd4aa6 5920
810085dd
EH
5921void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5922 struct amdgpu_ring *ring)
5923{
5924#ifdef CONFIG_X86_64
b818a5d3 5925 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5926 return;
5927#endif
5928 if (adev->gmc.xgmi.connected_to_cpu)
5929 return;
c1dd4aa6 5930
810085dd
EH
5931 amdgpu_asic_invalidate_hdp(adev, ring);
5932}
34f3a4a9 5933
89a7a870
AG
5934int amdgpu_in_reset(struct amdgpu_device *adev)
5935{
5936 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
5937}
5938
34f3a4a9
LY
5939/**
5940 * amdgpu_device_halt() - bring hardware to some kind of halt state
5941 *
5942 * @adev: amdgpu_device pointer
5943 *
5944 * Bring hardware to some kind of halt state so that no one can touch it
5945 * any more. It will help to maintain error context when error occurred.
5946 * Compare to a simple hang, the system will keep stable at least for SSH
5947 * access. Then it should be trivial to inspect the hardware state and
5948 * see what's going on. Implemented as following:
5949 *
5950 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5951 * clears all CPU mappings to device, disallows remappings through page faults
5952 * 2. amdgpu_irq_disable_all() disables all interrupts
5953 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5954 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5955 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5956 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5957 * flush any in flight DMA operations
5958 */
5959void amdgpu_device_halt(struct amdgpu_device *adev)
5960{
5961 struct pci_dev *pdev = adev->pdev;
e0f943b4 5962 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5963
5964 drm_dev_unplug(ddev);
5965
5966 amdgpu_irq_disable_all(adev);
5967
5968 amdgpu_fence_driver_hw_fini(adev);
5969
5970 adev->no_hw_access = true;
5971
5972 amdgpu_device_unmap_mmio(adev);
5973
5974 pci_disable_device(pdev);
5975 pci_wait_for_pending_transaction(pdev);
5976}
86700a40
XD
5977
5978u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5979 u32 reg)
5980{
5981 unsigned long flags, address, data;
5982 u32 r;
5983
5984 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5985 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5986
5987 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5988 WREG32(address, reg * 4);
5989 (void)RREG32(address);
5990 r = RREG32(data);
5991 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5992 return r;
5993}
5994
5995void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5996 u32 reg, u32 v)
5997{
5998 unsigned long flags, address, data;
5999
6000 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6001 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6002
6003 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6004 WREG32(address, reg * 4);
6005 (void)RREG32(address);
6006 WREG32(data, v);
6007 (void)RREG32(data);
6008 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6009}
68ce8b24
CK
6010
6011/**
6012 * amdgpu_device_switch_gang - switch to a new gang
6013 * @adev: amdgpu_device pointer
6014 * @gang: the gang to switch to
6015 *
6016 * Try to switch to a new gang.
6017 * Returns: NULL if we switched to the new gang or a reference to the current
6018 * gang leader.
6019 */
6020struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6021 struct dma_fence *gang)
6022{
6023 struct dma_fence *old = NULL;
6024
6025 do {
6026 dma_fence_put(old);
6027 rcu_read_lock();
6028 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6029 rcu_read_unlock();
6030
6031 if (old == gang)
6032 break;
6033
6034 if (!dma_fence_is_signaled(old))
6035 return old;
6036
6037 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6038 old, gang) != old);
6039
6040 dma_fence_put(old);
6041 return NULL;
6042}
220c8cc8
AD
6043
6044bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6045{
6046 switch (adev->asic_type) {
6047#ifdef CONFIG_DRM_AMDGPU_SI
6048 case CHIP_HAINAN:
6049#endif
6050 case CHIP_TOPAZ:
6051 /* chips with no display hardware */
6052 return false;
6053#ifdef CONFIG_DRM_AMDGPU_SI
6054 case CHIP_TAHITI:
6055 case CHIP_PITCAIRN:
6056 case CHIP_VERDE:
6057 case CHIP_OLAND:
6058#endif
6059#ifdef CONFIG_DRM_AMDGPU_CIK
6060 case CHIP_BONAIRE:
6061 case CHIP_HAWAII:
6062 case CHIP_KAVERI:
6063 case CHIP_KABINI:
6064 case CHIP_MULLINS:
6065#endif
6066 case CHIP_TONGA:
6067 case CHIP_FIJI:
6068 case CHIP_POLARIS10:
6069 case CHIP_POLARIS11:
6070 case CHIP_POLARIS12:
6071 case CHIP_VEGAM:
6072 case CHIP_CARRIZO:
6073 case CHIP_STONEY:
6074 /* chips with display hardware */
6075 return true;
6076 default:
6077 /* IP discovery */
6078 if (!adev->ip_versions[DCE_HWIP][0] ||
6079 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6080 return false;
6081 return true;
6082 }
6083}