drm/amdgpu: enable ras for mp0 v13_0_10 on SRIOV
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
162static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166
KR
167/**
168 * DOC: product_name
169 *
170 * The amdgpu driver provides a sysfs API for reporting the product name
171 * for the device
2c496a6c 172 * The file product_name is used for this and returns the product name
bd607166
KR
173 * as returned from the FRU.
174 * NOTE: This is only available for certain server cards
175 */
176
177static ssize_t amdgpu_device_get_product_name(struct device *dev,
178 struct device_attribute *attr, char *buf)
179{
180 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 181 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 182
36000c7a 183 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
184}
185
186static DEVICE_ATTR(product_name, S_IRUGO,
187 amdgpu_device_get_product_name, NULL);
188
189/**
190 * DOC: product_number
191 *
192 * The amdgpu driver provides a sysfs API for reporting the part number
193 * for the device
2c496a6c 194 * The file product_number is used for this and returns the part number
bd607166
KR
195 * as returned from the FRU.
196 * NOTE: This is only available for certain server cards
197 */
198
199static ssize_t amdgpu_device_get_product_number(struct device *dev,
200 struct device_attribute *attr, char *buf)
201{
202 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 203 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 204
36000c7a 205 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
206}
207
208static DEVICE_ATTR(product_number, S_IRUGO,
209 amdgpu_device_get_product_number, NULL);
210
211/**
212 * DOC: serial_number
213 *
214 * The amdgpu driver provides a sysfs API for reporting the serial number
215 * for the device
216 * The file serial_number is used for this and returns the serial number
217 * as returned from the FRU.
218 * NOTE: This is only available for certain server cards
219 */
220
221static ssize_t amdgpu_device_get_serial_number(struct device *dev,
222 struct device_attribute *attr, char *buf)
223{
224 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 225 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 226
36000c7a 227 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
228}
229
230static DEVICE_ATTR(serial_number, S_IRUGO,
231 amdgpu_device_get_serial_number, NULL);
232
fd496ca8 233/**
b98c6299 234 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
235 *
236 * @dev: drm_device pointer
237 *
b98c6299 238 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
239 * otherwise return false.
240 */
b98c6299 241bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
242{
243 struct amdgpu_device *adev = drm_to_adev(dev);
244
b98c6299 245 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
246 return true;
247 return false;
248}
249
e3ecdffa 250/**
0330b848 251 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
252 *
253 * @dev: drm_device pointer
254 *
b98c6299 255 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
256 * otherwise return false.
257 */
31af062a 258bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 259{
1348969a 260 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 261
b98c6299
AD
262 if (adev->has_pr3 ||
263 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
264 return true;
265 return false;
266}
267
a69cba42
AD
268/**
269 * amdgpu_device_supports_baco - Does the device support BACO
270 *
271 * @dev: drm_device pointer
272 *
273 * Returns true if the device supporte BACO,
274 * otherwise return false.
275 */
276bool amdgpu_device_supports_baco(struct drm_device *dev)
277{
1348969a 278 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
279
280 return amdgpu_asic_supports_baco(adev);
281}
282
3fa8f89d
S
283/**
284 * amdgpu_device_supports_smart_shift - Is the device dGPU with
285 * smart shift support
286 *
287 * @dev: drm_device pointer
288 *
289 * Returns true if the device is a dGPU with Smart Shift support,
290 * otherwise returns false.
291 */
292bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
293{
294 return (amdgpu_device_supports_boco(dev) &&
295 amdgpu_acpi_is_power_shift_control_supported());
296}
297
6e3cd2a9
MCC
298/*
299 * VRAM access helper functions
300 */
301
e35e2b11 302/**
048af66b 303 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
304 *
305 * @adev: amdgpu_device pointer
306 * @pos: offset of the buffer in vram
307 * @buf: virtual address of the buffer in system memory
308 * @size: read/write size, sizeof(@buf) must > @size
309 * @write: true - write to vram, otherwise - read from vram
310 */
048af66b
KW
311void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
312 void *buf, size_t size, bool write)
e35e2b11 313{
e35e2b11 314 unsigned long flags;
048af66b
KW
315 uint32_t hi = ~0, tmp = 0;
316 uint32_t *data = buf;
ce05ac56 317 uint64_t last;
f89f8c6b 318 int idx;
ce05ac56 319
c58a863b 320 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 321 return;
9d11eb0d 322
048af66b
KW
323 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
324
325 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
326 for (last = pos + size; pos < last; pos += 4) {
327 tmp = pos >> 31;
328
329 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
330 if (tmp != hi) {
331 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
332 hi = tmp;
333 }
334 if (write)
335 WREG32_NO_KIQ(mmMM_DATA, *data++);
336 else
337 *data++ = RREG32_NO_KIQ(mmMM_DATA);
338 }
339
340 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
341 drm_dev_exit(idx);
342}
343
344/**
bbe04dec 345 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
346 *
347 * @adev: amdgpu_device pointer
348 * @pos: offset of the buffer in vram
349 * @buf: virtual address of the buffer in system memory
350 * @size: read/write size, sizeof(@buf) must > @size
351 * @write: true - write to vram, otherwise - read from vram
352 *
353 * The return value means how many bytes have been transferred.
354 */
355size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
356 void *buf, size_t size, bool write)
357{
9d11eb0d 358#ifdef CONFIG_64BIT
048af66b
KW
359 void __iomem *addr;
360 size_t count = 0;
361 uint64_t last;
362
363 if (!adev->mman.aper_base_kaddr)
364 return 0;
365
9d11eb0d
CK
366 last = min(pos + size, adev->gmc.visible_vram_size);
367 if (last > pos) {
048af66b
KW
368 addr = adev->mman.aper_base_kaddr + pos;
369 count = last - pos;
9d11eb0d
CK
370
371 if (write) {
372 memcpy_toio(addr, buf, count);
373 mb();
810085dd 374 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 375 } else {
810085dd 376 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
377 mb();
378 memcpy_fromio(buf, addr, count);
379 }
380
9d11eb0d 381 }
048af66b
KW
382
383 return count;
384#else
385 return 0;
9d11eb0d 386#endif
048af66b 387}
9d11eb0d 388
048af66b
KW
389/**
390 * amdgpu_device_vram_access - read/write a buffer in vram
391 *
392 * @adev: amdgpu_device pointer
393 * @pos: offset of the buffer in vram
394 * @buf: virtual address of the buffer in system memory
395 * @size: read/write size, sizeof(@buf) must > @size
396 * @write: true - write to vram, otherwise - read from vram
397 */
398void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
399 void *buf, size_t size, bool write)
400{
401 size_t count;
e35e2b11 402
048af66b
KW
403 /* try to using vram apreature to access vram first */
404 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
405 size -= count;
406 if (size) {
407 /* using MM to access rest vram */
408 pos += count;
409 buf += count;
410 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
411 }
412}
413
d38ceaf9 414/*
f7ee1874 415 * register access helper functions.
d38ceaf9 416 */
56b53c0b
DL
417
418/* Check if hw access should be skipped because of hotplug or device error */
419bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
420{
7afefb81 421 if (adev->no_hw_access)
56b53c0b
DL
422 return true;
423
424#ifdef CONFIG_LOCKDEP
425 /*
426 * This is a bit complicated to understand, so worth a comment. What we assert
427 * here is that the GPU reset is not running on another thread in parallel.
428 *
429 * For this we trylock the read side of the reset semaphore, if that succeeds
430 * we know that the reset is not running in paralell.
431 *
432 * If the trylock fails we assert that we are either already holding the read
433 * side of the lock or are the reset thread itself and hold the write side of
434 * the lock.
435 */
436 if (in_task()) {
d0fb18b5
AG
437 if (down_read_trylock(&adev->reset_domain->sem))
438 up_read(&adev->reset_domain->sem);
56b53c0b 439 else
d0fb18b5 440 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
441 }
442#endif
443 return false;
444}
445
e3ecdffa 446/**
f7ee1874 447 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
448 *
449 * @adev: amdgpu_device pointer
450 * @reg: dword aligned register offset
451 * @acc_flags: access flags which require special behavior
452 *
453 * Returns the 32 bit value from the offset specified.
454 */
f7ee1874
HZ
455uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
456 uint32_t reg, uint32_t acc_flags)
d38ceaf9 457{
f4b373f4
TSD
458 uint32_t ret;
459
56b53c0b 460 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
461 return 0;
462
f7ee1874
HZ
463 if ((reg * 4) < adev->rmmio_size) {
464 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
465 amdgpu_sriov_runtime(adev) &&
d0fb18b5 466 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 467 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 468 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
469 } else {
470 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
471 }
472 } else {
473 ret = adev->pcie_rreg(adev, reg * 4);
81202807 474 }
bc992ba5 475
f7ee1874 476 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 477
f4b373f4 478 return ret;
d38ceaf9
AD
479}
480
421a2a30
ML
481/*
482 * MMIO register read with bytes helper functions
483 * @offset:bytes offset from MMIO start
484 *
485*/
486
e3ecdffa
AD
487/**
488 * amdgpu_mm_rreg8 - read a memory mapped IO register
489 *
490 * @adev: amdgpu_device pointer
491 * @offset: byte aligned register offset
492 *
493 * Returns the 8 bit value from the offset specified.
494 */
7cbbc745
AG
495uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
496{
56b53c0b 497 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
498 return 0;
499
421a2a30
ML
500 if (offset < adev->rmmio_size)
501 return (readb(adev->rmmio + offset));
502 BUG();
503}
504
505/*
506 * MMIO register write with bytes helper functions
507 * @offset:bytes offset from MMIO start
508 * @value: the value want to be written to the register
509 *
510*/
e3ecdffa
AD
511/**
512 * amdgpu_mm_wreg8 - read a memory mapped IO register
513 *
514 * @adev: amdgpu_device pointer
515 * @offset: byte aligned register offset
516 * @value: 8 bit value to write
517 *
518 * Writes the value specified to the offset specified.
519 */
7cbbc745
AG
520void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
521{
56b53c0b 522 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
523 return;
524
421a2a30
ML
525 if (offset < adev->rmmio_size)
526 writeb(value, adev->rmmio + offset);
527 else
528 BUG();
529}
530
e3ecdffa 531/**
f7ee1874 532 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
533 *
534 * @adev: amdgpu_device pointer
535 * @reg: dword aligned register offset
536 * @v: 32 bit value to write to the register
537 * @acc_flags: access flags which require special behavior
538 *
539 * Writes the value specified to the offset specified.
540 */
f7ee1874
HZ
541void amdgpu_device_wreg(struct amdgpu_device *adev,
542 uint32_t reg, uint32_t v,
543 uint32_t acc_flags)
d38ceaf9 544{
56b53c0b 545 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
546 return;
547
f7ee1874
HZ
548 if ((reg * 4) < adev->rmmio_size) {
549 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
550 amdgpu_sriov_runtime(adev) &&
d0fb18b5 551 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 552 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 553 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
554 } else {
555 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
556 }
557 } else {
558 adev->pcie_wreg(adev, reg * 4, v);
81202807 559 }
bc992ba5 560
f7ee1874 561 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 562}
d38ceaf9 563
03f2abb0 564/**
4cc9f86f 565 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 566 *
71579346
RB
567 * @adev: amdgpu_device pointer
568 * @reg: mmio/rlc register
569 * @v: value to write
570 *
571 * this function is invoked only for the debugfs register access
03f2abb0 572 */
f7ee1874
HZ
573void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
574 uint32_t reg, uint32_t v)
2e0cc4d4 575{
56b53c0b 576 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
577 return;
578
2e0cc4d4 579 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
580 adev->gfx.rlc.funcs &&
581 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 582 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 583 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
584 } else if ((reg * 4) >= adev->rmmio_size) {
585 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
586 } else {
587 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 588 }
d38ceaf9
AD
589}
590
d38ceaf9
AD
591/**
592 * amdgpu_mm_rdoorbell - read a doorbell dword
593 *
594 * @adev: amdgpu_device pointer
595 * @index: doorbell index
596 *
597 * Returns the value in the doorbell aperture at the
598 * requested doorbell index (CIK).
599 */
600u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
601{
56b53c0b 602 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
603 return 0;
604
d38ceaf9
AD
605 if (index < adev->doorbell.num_doorbells) {
606 return readl(adev->doorbell.ptr + index);
607 } else {
608 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
609 return 0;
610 }
611}
612
613/**
614 * amdgpu_mm_wdoorbell - write a doorbell dword
615 *
616 * @adev: amdgpu_device pointer
617 * @index: doorbell index
618 * @v: value to write
619 *
620 * Writes @v to the doorbell aperture at the
621 * requested doorbell index (CIK).
622 */
623void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
624{
56b53c0b 625 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
626 return;
627
d38ceaf9
AD
628 if (index < adev->doorbell.num_doorbells) {
629 writel(v, adev->doorbell.ptr + index);
630 } else {
631 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
632 }
633}
634
832be404
KW
635/**
636 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
637 *
638 * @adev: amdgpu_device pointer
639 * @index: doorbell index
640 *
641 * Returns the value in the doorbell aperture at the
642 * requested doorbell index (VEGA10+).
643 */
644u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
645{
56b53c0b 646 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
647 return 0;
648
832be404
KW
649 if (index < adev->doorbell.num_doorbells) {
650 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
651 } else {
652 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
653 return 0;
654 }
655}
656
657/**
658 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
659 *
660 * @adev: amdgpu_device pointer
661 * @index: doorbell index
662 * @v: value to write
663 *
664 * Writes @v to the doorbell aperture at the
665 * requested doorbell index (VEGA10+).
666 */
667void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
668{
56b53c0b 669 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
670 return;
671
832be404
KW
672 if (index < adev->doorbell.num_doorbells) {
673 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
674 } else {
675 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
676 }
677}
678
1bba3683
HZ
679/**
680 * amdgpu_device_indirect_rreg - read an indirect register
681 *
682 * @adev: amdgpu_device pointer
22f453fb 683 * @reg_addr: indirect register address to read from
1bba3683
HZ
684 *
685 * Returns the value of indirect register @reg_addr
686 */
687u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
688 u32 reg_addr)
689{
65ba96e9 690 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
691 void __iomem *pcie_index_offset;
692 void __iomem *pcie_data_offset;
65ba96e9
HZ
693 u32 r;
694
695 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
696 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
697
698 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
699 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
700 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
701
702 writel(reg_addr, pcie_index_offset);
703 readl(pcie_index_offset);
704 r = readl(pcie_data_offset);
705 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
706
707 return r;
708}
709
710/**
711 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
712 *
713 * @adev: amdgpu_device pointer
22f453fb 714 * @reg_addr: indirect register address to read from
1bba3683
HZ
715 *
716 * Returns the value of indirect register @reg_addr
717 */
718u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
719 u32 reg_addr)
720{
65ba96e9 721 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
722 void __iomem *pcie_index_offset;
723 void __iomem *pcie_data_offset;
65ba96e9
HZ
724 u64 r;
725
726 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
727 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
728
729 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
730 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
731 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
732
733 /* read low 32 bits */
734 writel(reg_addr, pcie_index_offset);
735 readl(pcie_index_offset);
736 r = readl(pcie_data_offset);
737 /* read high 32 bits */
738 writel(reg_addr + 4, pcie_index_offset);
739 readl(pcie_index_offset);
740 r |= ((u64)readl(pcie_data_offset) << 32);
741 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
742
743 return r;
744}
745
746/**
747 * amdgpu_device_indirect_wreg - write an indirect register address
748 *
749 * @adev: amdgpu_device pointer
750 * @pcie_index: mmio register offset
751 * @pcie_data: mmio register offset
752 * @reg_addr: indirect register offset
753 * @reg_data: indirect register data
754 *
755 */
756void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
757 u32 reg_addr, u32 reg_data)
758{
65ba96e9 759 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
760 void __iomem *pcie_index_offset;
761 void __iomem *pcie_data_offset;
762
65ba96e9
HZ
763 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
764 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
765
1bba3683
HZ
766 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
767 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
768 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
769
770 writel(reg_addr, pcie_index_offset);
771 readl(pcie_index_offset);
772 writel(reg_data, pcie_data_offset);
773 readl(pcie_data_offset);
774 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
775}
776
777/**
778 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
779 *
780 * @adev: amdgpu_device pointer
781 * @pcie_index: mmio register offset
782 * @pcie_data: mmio register offset
783 * @reg_addr: indirect register offset
784 * @reg_data: indirect register data
785 *
786 */
787void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
788 u32 reg_addr, u64 reg_data)
789{
65ba96e9 790 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
791 void __iomem *pcie_index_offset;
792 void __iomem *pcie_data_offset;
793
65ba96e9
HZ
794 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
795 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
796
1bba3683
HZ
797 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
798 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
799 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
800
801 /* write low 32 bits */
802 writel(reg_addr, pcie_index_offset);
803 readl(pcie_index_offset);
804 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
805 readl(pcie_data_offset);
806 /* write high 32 bits */
807 writel(reg_addr + 4, pcie_index_offset);
808 readl(pcie_index_offset);
809 writel((u32)(reg_data >> 32), pcie_data_offset);
810 readl(pcie_data_offset);
811 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
812}
813
dabc114e
HZ
814/**
815 * amdgpu_device_get_rev_id - query device rev_id
816 *
817 * @adev: amdgpu_device pointer
818 *
819 * Return device rev_id
820 */
821u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
822{
823 return adev->nbio.funcs->get_rev_id(adev);
824}
825
d38ceaf9
AD
826/**
827 * amdgpu_invalid_rreg - dummy reg read function
828 *
982a820b 829 * @adev: amdgpu_device pointer
d38ceaf9
AD
830 * @reg: offset of register
831 *
832 * Dummy register read function. Used for register blocks
833 * that certain asics don't have (all asics).
834 * Returns the value in the register.
835 */
836static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
837{
838 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
839 BUG();
840 return 0;
841}
842
843/**
844 * amdgpu_invalid_wreg - dummy reg write function
845 *
982a820b 846 * @adev: amdgpu_device pointer
d38ceaf9
AD
847 * @reg: offset of register
848 * @v: value to write to the register
849 *
850 * Dummy register read function. Used for register blocks
851 * that certain asics don't have (all asics).
852 */
853static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
854{
855 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
856 reg, v);
857 BUG();
858}
859
4fa1c6a6
TZ
860/**
861 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
862 *
982a820b 863 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
864 * @reg: offset of register
865 *
866 * Dummy register read function. Used for register blocks
867 * that certain asics don't have (all asics).
868 * Returns the value in the register.
869 */
870static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
871{
872 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
873 BUG();
874 return 0;
875}
876
877/**
878 * amdgpu_invalid_wreg64 - dummy reg write function
879 *
982a820b 880 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
881 * @reg: offset of register
882 * @v: value to write to the register
883 *
884 * Dummy register read function. Used for register blocks
885 * that certain asics don't have (all asics).
886 */
887static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
888{
889 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
890 reg, v);
891 BUG();
892}
893
d38ceaf9
AD
894/**
895 * amdgpu_block_invalid_rreg - dummy reg read function
896 *
982a820b 897 * @adev: amdgpu_device pointer
d38ceaf9
AD
898 * @block: offset of instance
899 * @reg: offset of register
900 *
901 * Dummy register read function. Used for register blocks
902 * that certain asics don't have (all asics).
903 * Returns the value in the register.
904 */
905static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
906 uint32_t block, uint32_t reg)
907{
908 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
909 reg, block);
910 BUG();
911 return 0;
912}
913
914/**
915 * amdgpu_block_invalid_wreg - dummy reg write function
916 *
982a820b 917 * @adev: amdgpu_device pointer
d38ceaf9
AD
918 * @block: offset of instance
919 * @reg: offset of register
920 * @v: value to write to the register
921 *
922 * Dummy register read function. Used for register blocks
923 * that certain asics don't have (all asics).
924 */
925static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
926 uint32_t block,
927 uint32_t reg, uint32_t v)
928{
929 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
930 reg, block, v);
931 BUG();
932}
933
4d2997ab
AD
934/**
935 * amdgpu_device_asic_init - Wrapper for atom asic_init
936 *
982a820b 937 * @adev: amdgpu_device pointer
4d2997ab
AD
938 *
939 * Does any asic specific work and then calls atom asic init.
940 */
941static int amdgpu_device_asic_init(struct amdgpu_device *adev)
942{
943 amdgpu_asic_pre_asic_init(adev);
944
85d1bcc6
HZ
945 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
946 return amdgpu_atomfirmware_asic_init(adev, true);
947 else
948 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
949}
950
e3ecdffa 951/**
7ccfd79f 952 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 953 *
982a820b 954 * @adev: amdgpu_device pointer
e3ecdffa
AD
955 *
956 * Allocates a scratch page of VRAM for use by various things in the
957 * driver.
958 */
7ccfd79f 959static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 960{
7ccfd79f
CK
961 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
962 AMDGPU_GEM_DOMAIN_VRAM |
963 AMDGPU_GEM_DOMAIN_GTT,
964 &adev->mem_scratch.robj,
965 &adev->mem_scratch.gpu_addr,
966 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
967}
968
e3ecdffa 969/**
7ccfd79f 970 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 971 *
982a820b 972 * @adev: amdgpu_device pointer
e3ecdffa
AD
973 *
974 * Frees the VRAM scratch page.
975 */
7ccfd79f 976static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 977{
7ccfd79f 978 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
979}
980
981/**
9c3f2b54 982 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
983 *
984 * @adev: amdgpu_device pointer
985 * @registers: pointer to the register array
986 * @array_size: size of the register array
987 *
988 * Programs an array or registers with and and or masks.
989 * This is a helper for setting golden registers.
990 */
9c3f2b54
AD
991void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
992 const u32 *registers,
993 const u32 array_size)
d38ceaf9
AD
994{
995 u32 tmp, reg, and_mask, or_mask;
996 int i;
997
998 if (array_size % 3)
999 return;
1000
1001 for (i = 0; i < array_size; i +=3) {
1002 reg = registers[i + 0];
1003 and_mask = registers[i + 1];
1004 or_mask = registers[i + 2];
1005
1006 if (and_mask == 0xffffffff) {
1007 tmp = or_mask;
1008 } else {
1009 tmp = RREG32(reg);
1010 tmp &= ~and_mask;
e0d07657
HZ
1011 if (adev->family >= AMDGPU_FAMILY_AI)
1012 tmp |= (or_mask & and_mask);
1013 else
1014 tmp |= or_mask;
d38ceaf9
AD
1015 }
1016 WREG32(reg, tmp);
1017 }
1018}
1019
e3ecdffa
AD
1020/**
1021 * amdgpu_device_pci_config_reset - reset the GPU
1022 *
1023 * @adev: amdgpu_device pointer
1024 *
1025 * Resets the GPU using the pci config reset sequence.
1026 * Only applicable to asics prior to vega10.
1027 */
8111c387 1028void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1029{
1030 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1031}
1032
af484df8
AD
1033/**
1034 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1035 *
1036 * @adev: amdgpu_device pointer
1037 *
1038 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1039 */
1040int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1041{
1042 return pci_reset_function(adev->pdev);
1043}
1044
d38ceaf9
AD
1045/*
1046 * GPU doorbell aperture helpers function.
1047 */
1048/**
06ec9070 1049 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1050 *
1051 * @adev: amdgpu_device pointer
1052 *
1053 * Init doorbell driver information (CIK)
1054 * Returns 0 on success, error on failure.
1055 */
06ec9070 1056static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1057{
6585661d 1058
705e519e
CK
1059 /* No doorbell on SI hardware generation */
1060 if (adev->asic_type < CHIP_BONAIRE) {
1061 adev->doorbell.base = 0;
1062 adev->doorbell.size = 0;
1063 adev->doorbell.num_doorbells = 0;
1064 adev->doorbell.ptr = NULL;
1065 return 0;
1066 }
1067
d6895ad3
CK
1068 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1069 return -EINVAL;
1070
22357775
AD
1071 amdgpu_asic_init_doorbell_index(adev);
1072
d38ceaf9
AD
1073 /* doorbell bar mapping */
1074 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1075 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1076
de33a329
JX
1077 if (adev->enable_mes) {
1078 adev->doorbell.num_doorbells =
1079 adev->doorbell.size / sizeof(u32);
1080 } else {
1081 adev->doorbell.num_doorbells =
1082 min_t(u32, adev->doorbell.size / sizeof(u32),
1083 adev->doorbell_index.max_assignment+1);
1084 if (adev->doorbell.num_doorbells == 0)
1085 return -EINVAL;
1086
1087 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1088 * paging queue doorbell use the second page. The
1089 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1090 * doorbells are in the first page. So with paging queue enabled,
1091 * the max num_doorbells should + 1 page (0x400 in dword)
1092 */
1093 if (adev->asic_type >= CHIP_VEGA10)
1094 adev->doorbell.num_doorbells += 0x400;
1095 }
ec3db8a6 1096
8972e5d2
CK
1097 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1098 adev->doorbell.num_doorbells *
1099 sizeof(u32));
1100 if (adev->doorbell.ptr == NULL)
d38ceaf9 1101 return -ENOMEM;
d38ceaf9
AD
1102
1103 return 0;
1104}
1105
1106/**
06ec9070 1107 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1108 *
1109 * @adev: amdgpu_device pointer
1110 *
1111 * Tear down doorbell driver information (CIK)
1112 */
06ec9070 1113static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1114{
1115 iounmap(adev->doorbell.ptr);
1116 adev->doorbell.ptr = NULL;
1117}
1118
22cb0164 1119
d38ceaf9
AD
1120
1121/*
06ec9070 1122 * amdgpu_device_wb_*()
455a7bc2 1123 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1124 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1125 */
1126
1127/**
06ec9070 1128 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1129 *
1130 * @adev: amdgpu_device pointer
1131 *
1132 * Disables Writeback and frees the Writeback memory (all asics).
1133 * Used at driver shutdown.
1134 */
06ec9070 1135static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1136{
1137 if (adev->wb.wb_obj) {
a76ed485
AD
1138 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1139 &adev->wb.gpu_addr,
1140 (void **)&adev->wb.wb);
d38ceaf9
AD
1141 adev->wb.wb_obj = NULL;
1142 }
1143}
1144
1145/**
03f2abb0 1146 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1147 *
1148 * @adev: amdgpu_device pointer
1149 *
455a7bc2 1150 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1151 * Used at driver startup.
1152 * Returns 0 on success or an -error on failure.
1153 */
06ec9070 1154static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1155{
1156 int r;
1157
1158 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1159 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1160 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1161 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1162 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1163 (void **)&adev->wb.wb);
d38ceaf9
AD
1164 if (r) {
1165 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1166 return r;
1167 }
d38ceaf9
AD
1168
1169 adev->wb.num_wb = AMDGPU_MAX_WB;
1170 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1171
1172 /* clear wb memory */
73469585 1173 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1174 }
1175
1176 return 0;
1177}
1178
1179/**
131b4b36 1180 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1181 *
1182 * @adev: amdgpu_device pointer
1183 * @wb: wb index
1184 *
1185 * Allocate a wb slot for use by the driver (all asics).
1186 * Returns 0 on success or -EINVAL on failure.
1187 */
131b4b36 1188int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1189{
1190 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1191
97407b63 1192 if (offset < adev->wb.num_wb) {
7014285a 1193 __set_bit(offset, adev->wb.used);
63ae07ca 1194 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1195 return 0;
1196 } else {
1197 return -EINVAL;
1198 }
1199}
1200
d38ceaf9 1201/**
131b4b36 1202 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1203 *
1204 * @adev: amdgpu_device pointer
1205 * @wb: wb index
1206 *
1207 * Free a wb slot allocated for use by the driver (all asics)
1208 */
131b4b36 1209void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1210{
73469585 1211 wb >>= 3;
d38ceaf9 1212 if (wb < adev->wb.num_wb)
73469585 1213 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1214}
1215
d6895ad3
CK
1216/**
1217 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1218 *
1219 * @adev: amdgpu_device pointer
1220 *
1221 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1222 * to fail, but if any of the BARs is not accessible after the size we abort
1223 * driver loading by returning -ENODEV.
1224 */
1225int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1226{
453f617a 1227 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1228 struct pci_bus *root;
1229 struct resource *res;
1230 unsigned i;
d6895ad3
CK
1231 u16 cmd;
1232 int r;
1233
0c03b912 1234 /* Bypass for VF */
1235 if (amdgpu_sriov_vf(adev))
1236 return 0;
1237
b7221f2b
AD
1238 /* skip if the bios has already enabled large BAR */
1239 if (adev->gmc.real_vram_size &&
1240 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1241 return 0;
1242
31b8adab
CK
1243 /* Check if the root BUS has 64bit memory resources */
1244 root = adev->pdev->bus;
1245 while (root->parent)
1246 root = root->parent;
1247
1248 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1249 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1250 res->start > 0x100000000ull)
1251 break;
1252 }
1253
1254 /* Trying to resize is pointless without a root hub window above 4GB */
1255 if (!res)
1256 return 0;
1257
453f617a
ND
1258 /* Limit the BAR size to what is available */
1259 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1260 rbar_size);
1261
d6895ad3
CK
1262 /* Disable memory decoding while we change the BAR addresses and size */
1263 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1264 pci_write_config_word(adev->pdev, PCI_COMMAND,
1265 cmd & ~PCI_COMMAND_MEMORY);
1266
1267 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1268 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1269 if (adev->asic_type >= CHIP_BONAIRE)
1270 pci_release_resource(adev->pdev, 2);
1271
1272 pci_release_resource(adev->pdev, 0);
1273
1274 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1275 if (r == -ENOSPC)
1276 DRM_INFO("Not enough PCI address space for a large BAR.");
1277 else if (r && r != -ENOTSUPP)
1278 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1279
1280 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1281
1282 /* When the doorbell or fb BAR isn't available we have no chance of
1283 * using the device.
1284 */
06ec9070 1285 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1286 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1287 return -ENODEV;
1288
1289 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1290
1291 return 0;
1292}
a05502e5 1293
d38ceaf9
AD
1294/*
1295 * GPU helpers function.
1296 */
1297/**
39c640c0 1298 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1299 *
1300 * @adev: amdgpu_device pointer
1301 *
c836fec5
JQ
1302 * Check if the asic has been initialized (all asics) at driver startup
1303 * or post is needed if hw reset is performed.
1304 * Returns true if need or false if not.
d38ceaf9 1305 */
39c640c0 1306bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1307{
1308 uint32_t reg;
1309
bec86378
ML
1310 if (amdgpu_sriov_vf(adev))
1311 return false;
1312
1313 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1314 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1315 * some old smc fw still need driver do vPost otherwise gpu hang, while
1316 * those smc fw version above 22.15 doesn't have this flaw, so we force
1317 * vpost executed for smc version below 22.15
bec86378
ML
1318 */
1319 if (adev->asic_type == CHIP_FIJI) {
1320 int err;
1321 uint32_t fw_ver;
1322 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1323 /* force vPost if error occured */
1324 if (err)
1325 return true;
1326
1327 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1328 if (fw_ver < 0x00160e00)
1329 return true;
bec86378 1330 }
bec86378 1331 }
91fe77eb 1332
e3c1b071 1333 /* Don't post if we need to reset whole hive on init */
1334 if (adev->gmc.xgmi.pending_reset)
1335 return false;
1336
91fe77eb 1337 if (adev->has_hw_reset) {
1338 adev->has_hw_reset = false;
1339 return true;
1340 }
1341
1342 /* bios scratch used on CIK+ */
1343 if (adev->asic_type >= CHIP_BONAIRE)
1344 return amdgpu_atombios_scratch_need_asic_init(adev);
1345
1346 /* check MEM_SIZE for older asics */
1347 reg = amdgpu_asic_get_config_memsize(adev);
1348
1349 if ((reg != 0) && (reg != 0xffffffff))
1350 return false;
1351
1352 return true;
bec86378
ML
1353}
1354
0ab5d711
ML
1355/**
1356 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1357 *
1358 * @adev: amdgpu_device pointer
1359 *
1360 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1361 * be set for this device.
1362 *
1363 * Returns true if it should be used or false if not.
1364 */
1365bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1366{
1367 switch (amdgpu_aspm) {
1368 case -1:
1369 break;
1370 case 0:
1371 return false;
1372 case 1:
1373 return true;
1374 default:
1375 return false;
1376 }
1377 return pcie_aspm_enabled(adev->pdev);
1378}
1379
3ad5dcfe
KHF
1380bool amdgpu_device_aspm_support_quirk(void)
1381{
1382#if IS_ENABLED(CONFIG_X86)
1383 struct cpuinfo_x86 *c = &cpu_data(0);
1384
1385 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1386#else
1387 return true;
1388#endif
1389}
1390
d38ceaf9
AD
1391/* if we get transitioned to only one device, take VGA back */
1392/**
06ec9070 1393 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1394 *
bf44e8ce 1395 * @pdev: PCI device pointer
d38ceaf9
AD
1396 * @state: enable/disable vga decode
1397 *
1398 * Enable/disable vga decode (all asics).
1399 * Returns VGA resource flags.
1400 */
bf44e8ce
CH
1401static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1402 bool state)
d38ceaf9 1403{
bf44e8ce 1404 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1405 amdgpu_asic_set_vga_state(adev, state);
1406 if (state)
1407 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1408 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1409 else
1410 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1411}
1412
e3ecdffa
AD
1413/**
1414 * amdgpu_device_check_block_size - validate the vm block size
1415 *
1416 * @adev: amdgpu_device pointer
1417 *
1418 * Validates the vm block size specified via module parameter.
1419 * The vm block size defines number of bits in page table versus page directory,
1420 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1421 * page table and the remaining bits are in the page directory.
1422 */
06ec9070 1423static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1424{
1425 /* defines number of bits in page table versus page directory,
1426 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1427 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1428 if (amdgpu_vm_block_size == -1)
1429 return;
a1adf8be 1430
bab4fee7 1431 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1432 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1433 amdgpu_vm_block_size);
97489129 1434 amdgpu_vm_block_size = -1;
a1adf8be 1435 }
a1adf8be
CZ
1436}
1437
e3ecdffa
AD
1438/**
1439 * amdgpu_device_check_vm_size - validate the vm size
1440 *
1441 * @adev: amdgpu_device pointer
1442 *
1443 * Validates the vm size in GB specified via module parameter.
1444 * The VM size is the size of the GPU virtual memory space in GB.
1445 */
06ec9070 1446static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1447{
64dab074
AD
1448 /* no need to check the default value */
1449 if (amdgpu_vm_size == -1)
1450 return;
1451
83ca145d
ZJ
1452 if (amdgpu_vm_size < 1) {
1453 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1454 amdgpu_vm_size);
f3368128 1455 amdgpu_vm_size = -1;
83ca145d 1456 }
83ca145d
ZJ
1457}
1458
7951e376
RZ
1459static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1460{
1461 struct sysinfo si;
a9d4fe2f 1462 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1463 uint64_t total_memory;
1464 uint64_t dram_size_seven_GB = 0x1B8000000;
1465 uint64_t dram_size_three_GB = 0xB8000000;
1466
1467 if (amdgpu_smu_memory_pool_size == 0)
1468 return;
1469
1470 if (!is_os_64) {
1471 DRM_WARN("Not 64-bit OS, feature not supported\n");
1472 goto def_value;
1473 }
1474 si_meminfo(&si);
1475 total_memory = (uint64_t)si.totalram * si.mem_unit;
1476
1477 if ((amdgpu_smu_memory_pool_size == 1) ||
1478 (amdgpu_smu_memory_pool_size == 2)) {
1479 if (total_memory < dram_size_three_GB)
1480 goto def_value1;
1481 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1482 (amdgpu_smu_memory_pool_size == 8)) {
1483 if (total_memory < dram_size_seven_GB)
1484 goto def_value1;
1485 } else {
1486 DRM_WARN("Smu memory pool size not supported\n");
1487 goto def_value;
1488 }
1489 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1490
1491 return;
1492
1493def_value1:
1494 DRM_WARN("No enough system memory\n");
1495def_value:
1496 adev->pm.smu_prv_buffer_size = 0;
1497}
1498
9f6a7857
HR
1499static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1500{
1501 if (!(adev->flags & AMD_IS_APU) ||
1502 adev->asic_type < CHIP_RAVEN)
1503 return 0;
1504
1505 switch (adev->asic_type) {
1506 case CHIP_RAVEN:
1507 if (adev->pdev->device == 0x15dd)
1508 adev->apu_flags |= AMD_APU_IS_RAVEN;
1509 if (adev->pdev->device == 0x15d8)
1510 adev->apu_flags |= AMD_APU_IS_PICASSO;
1511 break;
1512 case CHIP_RENOIR:
1513 if ((adev->pdev->device == 0x1636) ||
1514 (adev->pdev->device == 0x164c))
1515 adev->apu_flags |= AMD_APU_IS_RENOIR;
1516 else
1517 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1518 break;
1519 case CHIP_VANGOGH:
1520 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1521 break;
1522 case CHIP_YELLOW_CARP:
1523 break;
d0f56dc2 1524 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1525 if ((adev->pdev->device == 0x13FE) ||
1526 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1527 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1528 break;
9f6a7857 1529 default:
4eaf21b7 1530 break;
9f6a7857
HR
1531 }
1532
1533 return 0;
1534}
1535
d38ceaf9 1536/**
06ec9070 1537 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1538 *
1539 * @adev: amdgpu_device pointer
1540 *
1541 * Validates certain module parameters and updates
1542 * the associated values used by the driver (all asics).
1543 */
912dfc84 1544static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1545{
5b011235
CZ
1546 if (amdgpu_sched_jobs < 4) {
1547 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1548 amdgpu_sched_jobs);
1549 amdgpu_sched_jobs = 4;
76117507 1550 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1551 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1552 amdgpu_sched_jobs);
1553 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1554 }
d38ceaf9 1555
83e74db6 1556 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1557 /* gart size must be greater or equal to 32M */
1558 dev_warn(adev->dev, "gart size (%d) too small\n",
1559 amdgpu_gart_size);
83e74db6 1560 amdgpu_gart_size = -1;
d38ceaf9
AD
1561 }
1562
36d38372 1563 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1564 /* gtt size must be greater or equal to 32M */
36d38372
CK
1565 dev_warn(adev->dev, "gtt size (%d) too small\n",
1566 amdgpu_gtt_size);
1567 amdgpu_gtt_size = -1;
d38ceaf9
AD
1568 }
1569
d07f14be
RH
1570 /* valid range is between 4 and 9 inclusive */
1571 if (amdgpu_vm_fragment_size != -1 &&
1572 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1573 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1574 amdgpu_vm_fragment_size = -1;
1575 }
1576
5d5bd5e3
KW
1577 if (amdgpu_sched_hw_submission < 2) {
1578 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1579 amdgpu_sched_hw_submission);
1580 amdgpu_sched_hw_submission = 2;
1581 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1582 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1583 amdgpu_sched_hw_submission);
1584 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1585 }
1586
2656fd23
AG
1587 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1588 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1589 amdgpu_reset_method = -1;
1590 }
1591
7951e376
RZ
1592 amdgpu_device_check_smu_prv_buffer_size(adev);
1593
06ec9070 1594 amdgpu_device_check_vm_size(adev);
d38ceaf9 1595
06ec9070 1596 amdgpu_device_check_block_size(adev);
6a7f76e7 1597
19aede77 1598 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1599
e3c00faa 1600 return 0;
d38ceaf9
AD
1601}
1602
1603/**
1604 * amdgpu_switcheroo_set_state - set switcheroo state
1605 *
1606 * @pdev: pci dev pointer
1694467b 1607 * @state: vga_switcheroo state
d38ceaf9 1608 *
12024b17 1609 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1610 * the asics before or after it is powered up using ACPI methods.
1611 */
8aba21b7
LT
1612static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1613 enum vga_switcheroo_state state)
d38ceaf9
AD
1614{
1615 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1616 int r;
d38ceaf9 1617
b98c6299 1618 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1619 return;
1620
1621 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1622 pr_info("switched on\n");
d38ceaf9
AD
1623 /* don't suspend or resume card normally */
1624 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1625
8f66090b
TZ
1626 pci_set_power_state(pdev, PCI_D0);
1627 amdgpu_device_load_pci_state(pdev);
1628 r = pci_enable_device(pdev);
de185019
AD
1629 if (r)
1630 DRM_WARN("pci_enable_device failed (%d)\n", r);
1631 amdgpu_device_resume(dev, true);
d38ceaf9 1632
d38ceaf9 1633 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1634 } else {
dd4fa6c1 1635 pr_info("switched off\n");
d38ceaf9 1636 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1637 amdgpu_device_suspend(dev, true);
8f66090b 1638 amdgpu_device_cache_pci_state(pdev);
de185019 1639 /* Shut down the device */
8f66090b
TZ
1640 pci_disable_device(pdev);
1641 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1642 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1643 }
1644}
1645
1646/**
1647 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1648 *
1649 * @pdev: pci dev pointer
1650 *
1651 * Callback for the switcheroo driver. Check of the switcheroo
1652 * state can be changed.
1653 * Returns true if the state can be changed, false if not.
1654 */
1655static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1656{
1657 struct drm_device *dev = pci_get_drvdata(pdev);
1658
1659 /*
1660 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1661 * locking inversion with the driver load path. And the access here is
1662 * completely racy anyway. So don't bother with locking for now.
1663 */
7e13ad89 1664 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1665}
1666
1667static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1668 .set_gpu_state = amdgpu_switcheroo_set_state,
1669 .reprobe = NULL,
1670 .can_switch = amdgpu_switcheroo_can_switch,
1671};
1672
e3ecdffa
AD
1673/**
1674 * amdgpu_device_ip_set_clockgating_state - set the CG state
1675 *
87e3f136 1676 * @dev: amdgpu_device pointer
e3ecdffa
AD
1677 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1678 * @state: clockgating state (gate or ungate)
1679 *
1680 * Sets the requested clockgating state for all instances of
1681 * the hardware IP specified.
1682 * Returns the error code from the last instance.
1683 */
43fa561f 1684int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1685 enum amd_ip_block_type block_type,
1686 enum amd_clockgating_state state)
d38ceaf9 1687{
43fa561f 1688 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1689 int i, r = 0;
1690
1691 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1692 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1693 continue;
c722865a
RZ
1694 if (adev->ip_blocks[i].version->type != block_type)
1695 continue;
1696 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1697 continue;
1698 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1699 (void *)adev, state);
1700 if (r)
1701 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1702 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1703 }
1704 return r;
1705}
1706
e3ecdffa
AD
1707/**
1708 * amdgpu_device_ip_set_powergating_state - set the PG state
1709 *
87e3f136 1710 * @dev: amdgpu_device pointer
e3ecdffa
AD
1711 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1712 * @state: powergating state (gate or ungate)
1713 *
1714 * Sets the requested powergating state for all instances of
1715 * the hardware IP specified.
1716 * Returns the error code from the last instance.
1717 */
43fa561f 1718int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1719 enum amd_ip_block_type block_type,
1720 enum amd_powergating_state state)
d38ceaf9 1721{
43fa561f 1722 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1723 int i, r = 0;
1724
1725 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1726 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1727 continue;
c722865a
RZ
1728 if (adev->ip_blocks[i].version->type != block_type)
1729 continue;
1730 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1731 continue;
1732 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1733 (void *)adev, state);
1734 if (r)
1735 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1736 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1737 }
1738 return r;
1739}
1740
e3ecdffa
AD
1741/**
1742 * amdgpu_device_ip_get_clockgating_state - get the CG state
1743 *
1744 * @adev: amdgpu_device pointer
1745 * @flags: clockgating feature flags
1746 *
1747 * Walks the list of IPs on the device and updates the clockgating
1748 * flags for each IP.
1749 * Updates @flags with the feature flags for each hardware IP where
1750 * clockgating is enabled.
1751 */
2990a1fc 1752void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1753 u64 *flags)
6cb2d4e4
HR
1754{
1755 int i;
1756
1757 for (i = 0; i < adev->num_ip_blocks; i++) {
1758 if (!adev->ip_blocks[i].status.valid)
1759 continue;
1760 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1761 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1762 }
1763}
1764
e3ecdffa
AD
1765/**
1766 * amdgpu_device_ip_wait_for_idle - wait for idle
1767 *
1768 * @adev: amdgpu_device pointer
1769 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1770 *
1771 * Waits for the request hardware IP to be idle.
1772 * Returns 0 for success or a negative error code on failure.
1773 */
2990a1fc
AD
1774int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1775 enum amd_ip_block_type block_type)
5dbbb60b
AD
1776{
1777 int i, r;
1778
1779 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1780 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1781 continue;
a1255107
AD
1782 if (adev->ip_blocks[i].version->type == block_type) {
1783 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1784 if (r)
1785 return r;
1786 break;
1787 }
1788 }
1789 return 0;
1790
1791}
1792
e3ecdffa
AD
1793/**
1794 * amdgpu_device_ip_is_idle - is the hardware IP idle
1795 *
1796 * @adev: amdgpu_device pointer
1797 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1798 *
1799 * Check if the hardware IP is idle or not.
1800 * Returns true if it the IP is idle, false if not.
1801 */
2990a1fc
AD
1802bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1803 enum amd_ip_block_type block_type)
5dbbb60b
AD
1804{
1805 int i;
1806
1807 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1808 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1809 continue;
a1255107
AD
1810 if (adev->ip_blocks[i].version->type == block_type)
1811 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1812 }
1813 return true;
1814
1815}
1816
e3ecdffa
AD
1817/**
1818 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1819 *
1820 * @adev: amdgpu_device pointer
87e3f136 1821 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1822 *
1823 * Returns a pointer to the hardware IP block structure
1824 * if it exists for the asic, otherwise NULL.
1825 */
2990a1fc
AD
1826struct amdgpu_ip_block *
1827amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1828 enum amd_ip_block_type type)
d38ceaf9
AD
1829{
1830 int i;
1831
1832 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1833 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1834 return &adev->ip_blocks[i];
1835
1836 return NULL;
1837}
1838
1839/**
2990a1fc 1840 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1841 *
1842 * @adev: amdgpu_device pointer
5fc3aeeb 1843 * @type: enum amd_ip_block_type
d38ceaf9
AD
1844 * @major: major version
1845 * @minor: minor version
1846 *
1847 * return 0 if equal or greater
1848 * return 1 if smaller or the ip_block doesn't exist
1849 */
2990a1fc
AD
1850int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1851 enum amd_ip_block_type type,
1852 u32 major, u32 minor)
d38ceaf9 1853{
2990a1fc 1854 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1855
a1255107
AD
1856 if (ip_block && ((ip_block->version->major > major) ||
1857 ((ip_block->version->major == major) &&
1858 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1859 return 0;
1860
1861 return 1;
1862}
1863
a1255107 1864/**
2990a1fc 1865 * amdgpu_device_ip_block_add
a1255107
AD
1866 *
1867 * @adev: amdgpu_device pointer
1868 * @ip_block_version: pointer to the IP to add
1869 *
1870 * Adds the IP block driver information to the collection of IPs
1871 * on the asic.
1872 */
2990a1fc
AD
1873int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1874 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1875{
1876 if (!ip_block_version)
1877 return -EINVAL;
1878
7bd939d0
LG
1879 switch (ip_block_version->type) {
1880 case AMD_IP_BLOCK_TYPE_VCN:
1881 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1882 return 0;
1883 break;
1884 case AMD_IP_BLOCK_TYPE_JPEG:
1885 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1886 return 0;
1887 break;
1888 default:
1889 break;
1890 }
1891
e966a725 1892 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1893 ip_block_version->funcs->name);
1894
a1255107
AD
1895 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1896
1897 return 0;
1898}
1899
e3ecdffa
AD
1900/**
1901 * amdgpu_device_enable_virtual_display - enable virtual display feature
1902 *
1903 * @adev: amdgpu_device pointer
1904 *
1905 * Enabled the virtual display feature if the user has enabled it via
1906 * the module parameter virtual_display. This feature provides a virtual
1907 * display hardware on headless boards or in virtualized environments.
1908 * This function parses and validates the configuration string specified by
1909 * the user and configues the virtual display configuration (number of
1910 * virtual connectors, crtcs, etc.) specified.
1911 */
483ef985 1912static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1913{
1914 adev->enable_virtual_display = false;
1915
1916 if (amdgpu_virtual_display) {
8f66090b 1917 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1918 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1919
1920 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1921 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1922 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1923 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1924 if (!strcmp("all", pciaddname)
1925 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1926 long num_crtc;
1927 int res = -1;
1928
9accf2fd 1929 adev->enable_virtual_display = true;
0f66356d
ED
1930
1931 if (pciaddname_tmp)
1932 res = kstrtol(pciaddname_tmp, 10,
1933 &num_crtc);
1934
1935 if (!res) {
1936 if (num_crtc < 1)
1937 num_crtc = 1;
1938 if (num_crtc > 6)
1939 num_crtc = 6;
1940 adev->mode_info.num_crtc = num_crtc;
1941 } else {
1942 adev->mode_info.num_crtc = 1;
1943 }
9accf2fd
ED
1944 break;
1945 }
1946 }
1947
0f66356d
ED
1948 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1949 amdgpu_virtual_display, pci_address_name,
1950 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1951
1952 kfree(pciaddstr);
1953 }
1954}
1955
25263da3
AD
1956void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1957{
1958 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1959 adev->mode_info.num_crtc = 1;
1960 adev->enable_virtual_display = true;
1961 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1962 adev->enable_virtual_display, adev->mode_info.num_crtc);
1963 }
1964}
1965
e3ecdffa
AD
1966/**
1967 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1968 *
1969 * @adev: amdgpu_device pointer
1970 *
1971 * Parses the asic configuration parameters specified in the gpu info
1972 * firmware and makes them availale to the driver for use in configuring
1973 * the asic.
1974 * Returns 0 on success, -EINVAL on failure.
1975 */
e2a75f88
AD
1976static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1977{
e2a75f88 1978 const char *chip_name;
c0a43457 1979 char fw_name[40];
e2a75f88
AD
1980 int err;
1981 const struct gpu_info_firmware_header_v1_0 *hdr;
1982
ab4fe3e1
HR
1983 adev->firmware.gpu_info_fw = NULL;
1984
72de33f8 1985 if (adev->mman.discovery_bin) {
cc375d8c
TY
1986 /*
1987 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1988 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1989 * when DAL no longer needs it.
1990 */
1991 if (adev->asic_type != CHIP_NAVI12)
1992 return 0;
258620d0
AD
1993 }
1994
e2a75f88 1995 switch (adev->asic_type) {
e2a75f88
AD
1996 default:
1997 return 0;
1998 case CHIP_VEGA10:
1999 chip_name = "vega10";
2000 break;
3f76dced
AD
2001 case CHIP_VEGA12:
2002 chip_name = "vega12";
2003 break;
2d2e5e7e 2004 case CHIP_RAVEN:
54f78a76 2005 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2006 chip_name = "raven2";
54f78a76 2007 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2008 chip_name = "picasso";
54c4d17e
FX
2009 else
2010 chip_name = "raven";
2d2e5e7e 2011 break;
65e60f6e
LM
2012 case CHIP_ARCTURUS:
2013 chip_name = "arcturus";
2014 break;
42b325e5
XY
2015 case CHIP_NAVI12:
2016 chip_name = "navi12";
2017 break;
e2a75f88
AD
2018 }
2019
2020 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2021 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2022 if (err) {
2023 dev_err(adev->dev,
b31d3063 2024 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2025 fw_name);
2026 goto out;
2027 }
2028
ab4fe3e1 2029 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2030 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2031
2032 switch (hdr->version_major) {
2033 case 1:
2034 {
2035 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2036 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2037 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2038
cc375d8c
TY
2039 /*
2040 * Should be droped when DAL no longer needs it.
2041 */
2042 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2043 goto parse_soc_bounding_box;
2044
b5ab16bf
AD
2045 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2046 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2047 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2048 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2049 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2050 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2051 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2052 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2053 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2054 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2055 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2056 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2057 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2058 adev->gfx.cu_info.max_waves_per_simd =
2059 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2060 adev->gfx.cu_info.max_scratch_slots_per_cu =
2061 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2062 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2063 if (hdr->version_minor >= 1) {
35c2e910
HZ
2064 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2065 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2066 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2067 adev->gfx.config.num_sc_per_sh =
2068 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2069 adev->gfx.config.num_packer_per_sc =
2070 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2071 }
ec51d3fa
XY
2072
2073parse_soc_bounding_box:
ec51d3fa
XY
2074 /*
2075 * soc bounding box info is not integrated in disocovery table,
258620d0 2076 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2077 */
48321c3d
HW
2078 if (hdr->version_minor == 2) {
2079 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2080 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2081 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2082 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2083 }
e2a75f88
AD
2084 break;
2085 }
2086 default:
2087 dev_err(adev->dev,
2088 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2089 err = -EINVAL;
2090 goto out;
2091 }
2092out:
e2a75f88
AD
2093 return err;
2094}
2095
e3ecdffa
AD
2096/**
2097 * amdgpu_device_ip_early_init - run early init for hardware IPs
2098 *
2099 * @adev: amdgpu_device pointer
2100 *
2101 * Early initialization pass for hardware IPs. The hardware IPs that make
2102 * up each asic are discovered each IP's early_init callback is run. This
2103 * is the first stage in initializing the asic.
2104 * Returns 0 on success, negative error code on failure.
2105 */
06ec9070 2106static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2107{
901e2be2
AD
2108 struct drm_device *dev = adev_to_drm(adev);
2109 struct pci_dev *parent;
aaa36a97 2110 int i, r;
ced69502 2111 bool total;
d38ceaf9 2112
483ef985 2113 amdgpu_device_enable_virtual_display(adev);
a6be7570 2114
00a979f3 2115 if (amdgpu_sriov_vf(adev)) {
00a979f3 2116 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2117 if (r)
2118 return r;
00a979f3
WS
2119 }
2120
d38ceaf9 2121 switch (adev->asic_type) {
33f34802
KW
2122#ifdef CONFIG_DRM_AMDGPU_SI
2123 case CHIP_VERDE:
2124 case CHIP_TAHITI:
2125 case CHIP_PITCAIRN:
2126 case CHIP_OLAND:
2127 case CHIP_HAINAN:
295d0daf 2128 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2129 r = si_set_ip_blocks(adev);
2130 if (r)
2131 return r;
2132 break;
2133#endif
a2e73f56
AD
2134#ifdef CONFIG_DRM_AMDGPU_CIK
2135 case CHIP_BONAIRE:
2136 case CHIP_HAWAII:
2137 case CHIP_KAVERI:
2138 case CHIP_KABINI:
2139 case CHIP_MULLINS:
e1ad2d53 2140 if (adev->flags & AMD_IS_APU)
a2e73f56 2141 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2142 else
2143 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2144
2145 r = cik_set_ip_blocks(adev);
2146 if (r)
2147 return r;
2148 break;
2149#endif
da87c30b
AD
2150 case CHIP_TOPAZ:
2151 case CHIP_TONGA:
2152 case CHIP_FIJI:
2153 case CHIP_POLARIS10:
2154 case CHIP_POLARIS11:
2155 case CHIP_POLARIS12:
2156 case CHIP_VEGAM:
2157 case CHIP_CARRIZO:
2158 case CHIP_STONEY:
2159 if (adev->flags & AMD_IS_APU)
2160 adev->family = AMDGPU_FAMILY_CZ;
2161 else
2162 adev->family = AMDGPU_FAMILY_VI;
2163
2164 r = vi_set_ip_blocks(adev);
2165 if (r)
2166 return r;
2167 break;
d38ceaf9 2168 default:
63352b7f
AD
2169 r = amdgpu_discovery_set_ip_blocks(adev);
2170 if (r)
2171 return r;
2172 break;
d38ceaf9
AD
2173 }
2174
901e2be2
AD
2175 if (amdgpu_has_atpx() &&
2176 (amdgpu_is_atpx_hybrid() ||
2177 amdgpu_has_atpx_dgpu_power_cntl()) &&
2178 ((adev->flags & AMD_IS_APU) == 0) &&
2179 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2180 adev->flags |= AMD_IS_PX;
2181
85ac2021
AD
2182 if (!(adev->flags & AMD_IS_APU)) {
2183 parent = pci_upstream_bridge(adev->pdev);
2184 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2185 }
901e2be2 2186
c004d44e 2187 amdgpu_amdkfd_device_probe(adev);
1884734a 2188
3b94fb10 2189 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2190 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2191 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2192 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2193 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2194
ced69502 2195 total = true;
d38ceaf9
AD
2196 for (i = 0; i < adev->num_ip_blocks; i++) {
2197 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2198 DRM_ERROR("disabled ip block: %d <%s>\n",
2199 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2200 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2201 } else {
a1255107
AD
2202 if (adev->ip_blocks[i].version->funcs->early_init) {
2203 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2204 if (r == -ENOENT) {
a1255107 2205 adev->ip_blocks[i].status.valid = false;
2c1a2784 2206 } else if (r) {
a1255107
AD
2207 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2208 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2209 total = false;
2c1a2784 2210 } else {
a1255107 2211 adev->ip_blocks[i].status.valid = true;
2c1a2784 2212 }
974e6b64 2213 } else {
a1255107 2214 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2215 }
d38ceaf9 2216 }
21a249ca
AD
2217 /* get the vbios after the asic_funcs are set up */
2218 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2219 r = amdgpu_device_parse_gpu_info_fw(adev);
2220 if (r)
2221 return r;
2222
21a249ca
AD
2223 /* Read BIOS */
2224 if (!amdgpu_get_bios(adev))
2225 return -EINVAL;
2226
2227 r = amdgpu_atombios_init(adev);
2228 if (r) {
2229 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2230 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2231 return r;
2232 }
77eabc6f
PJZ
2233
2234 /*get pf2vf msg info at it's earliest time*/
2235 if (amdgpu_sriov_vf(adev))
2236 amdgpu_virt_init_data_exchange(adev);
2237
21a249ca 2238 }
d38ceaf9 2239 }
ced69502
ML
2240 if (!total)
2241 return -ENODEV;
d38ceaf9 2242
395d1fb9
NH
2243 adev->cg_flags &= amdgpu_cg_mask;
2244 adev->pg_flags &= amdgpu_pg_mask;
2245
d38ceaf9
AD
2246 return 0;
2247}
2248
0a4f2520
RZ
2249static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2250{
2251 int i, r;
2252
2253 for (i = 0; i < adev->num_ip_blocks; i++) {
2254 if (!adev->ip_blocks[i].status.sw)
2255 continue;
2256 if (adev->ip_blocks[i].status.hw)
2257 continue;
2258 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2259 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2260 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2261 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2262 if (r) {
2263 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2264 adev->ip_blocks[i].version->funcs->name, r);
2265 return r;
2266 }
2267 adev->ip_blocks[i].status.hw = true;
2268 }
2269 }
2270
2271 return 0;
2272}
2273
2274static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2275{
2276 int i, r;
2277
2278 for (i = 0; i < adev->num_ip_blocks; i++) {
2279 if (!adev->ip_blocks[i].status.sw)
2280 continue;
2281 if (adev->ip_blocks[i].status.hw)
2282 continue;
2283 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2284 if (r) {
2285 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2286 adev->ip_blocks[i].version->funcs->name, r);
2287 return r;
2288 }
2289 adev->ip_blocks[i].status.hw = true;
2290 }
2291
2292 return 0;
2293}
2294
7a3e0bb2
RZ
2295static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2296{
2297 int r = 0;
2298 int i;
80f41f84 2299 uint32_t smu_version;
7a3e0bb2
RZ
2300
2301 if (adev->asic_type >= CHIP_VEGA10) {
2302 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2303 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2304 continue;
2305
e3c1b071 2306 if (!adev->ip_blocks[i].status.sw)
2307 continue;
2308
482f0e53
ML
2309 /* no need to do the fw loading again if already done*/
2310 if (adev->ip_blocks[i].status.hw == true)
2311 break;
2312
53b3f8f4 2313 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2314 r = adev->ip_blocks[i].version->funcs->resume(adev);
2315 if (r) {
2316 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2317 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2318 return r;
2319 }
2320 } else {
2321 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2322 if (r) {
2323 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2324 adev->ip_blocks[i].version->funcs->name, r);
2325 return r;
7a3e0bb2 2326 }
7a3e0bb2 2327 }
482f0e53
ML
2328
2329 adev->ip_blocks[i].status.hw = true;
2330 break;
7a3e0bb2
RZ
2331 }
2332 }
482f0e53 2333
8973d9ec
ED
2334 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2335 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2336
80f41f84 2337 return r;
7a3e0bb2
RZ
2338}
2339
5fd8518d
AG
2340static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2341{
2342 long timeout;
2343 int r, i;
2344
2345 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2346 struct amdgpu_ring *ring = adev->rings[i];
2347
2348 /* No need to setup the GPU scheduler for rings that don't need it */
2349 if (!ring || ring->no_scheduler)
2350 continue;
2351
2352 switch (ring->funcs->type) {
2353 case AMDGPU_RING_TYPE_GFX:
2354 timeout = adev->gfx_timeout;
2355 break;
2356 case AMDGPU_RING_TYPE_COMPUTE:
2357 timeout = adev->compute_timeout;
2358 break;
2359 case AMDGPU_RING_TYPE_SDMA:
2360 timeout = adev->sdma_timeout;
2361 break;
2362 default:
2363 timeout = adev->video_timeout;
2364 break;
2365 }
2366
2367 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2368 ring->num_hw_submission, amdgpu_job_hang_limit,
8ab62eda
JG
2369 timeout, adev->reset_domain->wq,
2370 ring->sched_score, ring->name,
2371 adev->dev);
5fd8518d
AG
2372 if (r) {
2373 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2374 ring->name);
2375 return r;
2376 }
2377 }
2378
2379 return 0;
2380}
2381
2382
e3ecdffa
AD
2383/**
2384 * amdgpu_device_ip_init - run init for hardware IPs
2385 *
2386 * @adev: amdgpu_device pointer
2387 *
2388 * Main initialization pass for hardware IPs. The list of all the hardware
2389 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2390 * are run. sw_init initializes the software state associated with each IP
2391 * and hw_init initializes the hardware associated with each IP.
2392 * Returns 0 on success, negative error code on failure.
2393 */
06ec9070 2394static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2395{
2396 int i, r;
2397
c030f2e4 2398 r = amdgpu_ras_init(adev);
2399 if (r)
2400 return r;
2401
d38ceaf9 2402 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2403 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2404 continue;
a1255107 2405 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2406 if (r) {
a1255107
AD
2407 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2408 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2409 goto init_failed;
2c1a2784 2410 }
a1255107 2411 adev->ip_blocks[i].status.sw = true;
bfca0289 2412
c1c39032
AD
2413 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2414 /* need to do common hw init early so everything is set up for gmc */
2415 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2416 if (r) {
2417 DRM_ERROR("hw_init %d failed %d\n", i, r);
2418 goto init_failed;
2419 }
2420 adev->ip_blocks[i].status.hw = true;
2421 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2422 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2423 /* Try to reserve bad pages early */
2424 if (amdgpu_sriov_vf(adev))
2425 amdgpu_virt_exchange_data(adev);
2426
7ccfd79f 2427 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2428 if (r) {
7ccfd79f 2429 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2430 goto init_failed;
2c1a2784 2431 }
a1255107 2432 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2433 if (r) {
2434 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2435 goto init_failed;
2c1a2784 2436 }
06ec9070 2437 r = amdgpu_device_wb_init(adev);
2c1a2784 2438 if (r) {
06ec9070 2439 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2440 goto init_failed;
2c1a2784 2441 }
a1255107 2442 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2443
2444 /* right after GMC hw init, we create CSA */
8a1fbb4a 2445 if (amdgpu_mcbp) {
1e256e27 2446 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2447 AMDGPU_GEM_DOMAIN_VRAM |
2448 AMDGPU_GEM_DOMAIN_GTT,
2449 AMDGPU_CSA_SIZE);
2493664f
ML
2450 if (r) {
2451 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2452 goto init_failed;
2493664f
ML
2453 }
2454 }
d38ceaf9
AD
2455 }
2456 }
2457
c9ffa427 2458 if (amdgpu_sriov_vf(adev))
22c16d25 2459 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2460
533aed27
AG
2461 r = amdgpu_ib_pool_init(adev);
2462 if (r) {
2463 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2464 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2465 goto init_failed;
2466 }
2467
c8963ea4
RZ
2468 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2469 if (r)
72d3f592 2470 goto init_failed;
0a4f2520
RZ
2471
2472 r = amdgpu_device_ip_hw_init_phase1(adev);
2473 if (r)
72d3f592 2474 goto init_failed;
0a4f2520 2475
7a3e0bb2
RZ
2476 r = amdgpu_device_fw_loading(adev);
2477 if (r)
72d3f592 2478 goto init_failed;
7a3e0bb2 2479
0a4f2520
RZ
2480 r = amdgpu_device_ip_hw_init_phase2(adev);
2481 if (r)
72d3f592 2482 goto init_failed;
d38ceaf9 2483
121a2bc6
AG
2484 /*
2485 * retired pages will be loaded from eeprom and reserved here,
2486 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2487 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2488 * for I2C communication which only true at this point.
b82e65a9
GC
2489 *
2490 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2491 * failure from bad gpu situation and stop amdgpu init process
2492 * accordingly. For other failed cases, it will still release all
2493 * the resource and print error message, rather than returning one
2494 * negative value to upper level.
121a2bc6
AG
2495 *
2496 * Note: theoretically, this should be called before all vram allocations
2497 * to protect retired page from abusing
2498 */
b82e65a9
GC
2499 r = amdgpu_ras_recovery_init(adev);
2500 if (r)
2501 goto init_failed;
121a2bc6 2502
cfbb6b00
AG
2503 /**
2504 * In case of XGMI grab extra reference for reset domain for this device
2505 */
a4c63caf 2506 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2507 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2508 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2509 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2510
dfd0287b
LH
2511 if (WARN_ON(!hive)) {
2512 r = -ENOENT;
2513 goto init_failed;
2514 }
2515
46c67660 2516 if (!hive->reset_domain ||
2517 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2518 r = -ENOENT;
2519 amdgpu_put_xgmi_hive(hive);
2520 goto init_failed;
2521 }
2522
2523 /* Drop the early temporary reset domain we created for device */
2524 amdgpu_reset_put_reset_domain(adev->reset_domain);
2525 adev->reset_domain = hive->reset_domain;
9dfa4860 2526 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2527 }
a4c63caf
AG
2528 }
2529 }
2530
5fd8518d
AG
2531 r = amdgpu_device_init_schedulers(adev);
2532 if (r)
2533 goto init_failed;
e3c1b071 2534
2535 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2536 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2537 amdgpu_amdkfd_device_init(adev);
c6332b97 2538
bd607166
KR
2539 amdgpu_fru_get_product_info(adev);
2540
72d3f592 2541init_failed:
c9ffa427 2542 if (amdgpu_sriov_vf(adev))
c6332b97 2543 amdgpu_virt_release_full_gpu(adev, true);
2544
72d3f592 2545 return r;
d38ceaf9
AD
2546}
2547
e3ecdffa
AD
2548/**
2549 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2550 *
2551 * @adev: amdgpu_device pointer
2552 *
2553 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2554 * this function before a GPU reset. If the value is retained after a
2555 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2556 */
06ec9070 2557static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2558{
2559 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2560}
2561
e3ecdffa
AD
2562/**
2563 * amdgpu_device_check_vram_lost - check if vram is valid
2564 *
2565 * @adev: amdgpu_device pointer
2566 *
2567 * Checks the reset magic value written to the gart pointer in VRAM.
2568 * The driver calls this after a GPU reset to see if the contents of
2569 * VRAM is lost or now.
2570 * returns true if vram is lost, false if not.
2571 */
06ec9070 2572static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2573{
dadce777
EQ
2574 if (memcmp(adev->gart.ptr, adev->reset_magic,
2575 AMDGPU_RESET_MAGIC_NUM))
2576 return true;
2577
53b3f8f4 2578 if (!amdgpu_in_reset(adev))
dadce777
EQ
2579 return false;
2580
2581 /*
2582 * For all ASICs with baco/mode1 reset, the VRAM is
2583 * always assumed to be lost.
2584 */
2585 switch (amdgpu_asic_reset_method(adev)) {
2586 case AMD_RESET_METHOD_BACO:
2587 case AMD_RESET_METHOD_MODE1:
2588 return true;
2589 default:
2590 return false;
2591 }
0c49e0b8
CZ
2592}
2593
e3ecdffa 2594/**
1112a46b 2595 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2596 *
2597 * @adev: amdgpu_device pointer
b8b72130 2598 * @state: clockgating state (gate or ungate)
e3ecdffa 2599 *
e3ecdffa 2600 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2601 * set_clockgating_state callbacks are run.
2602 * Late initialization pass enabling clockgating for hardware IPs.
2603 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2604 * Returns 0 on success, negative error code on failure.
2605 */
fdd34271 2606
5d89bb2d
LL
2607int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2608 enum amd_clockgating_state state)
d38ceaf9 2609{
1112a46b 2610 int i, j, r;
d38ceaf9 2611
4a2ba394
SL
2612 if (amdgpu_emu_mode == 1)
2613 return 0;
2614
1112a46b
RZ
2615 for (j = 0; j < adev->num_ip_blocks; j++) {
2616 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2617 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2618 continue;
47198eb7 2619 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2620 if (adev->in_s0ix &&
47198eb7
AD
2621 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2622 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2623 continue;
4a446d55 2624 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2625 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2626 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2627 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2628 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2629 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2630 /* enable clockgating to save power */
a1255107 2631 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2632 state);
4a446d55
AD
2633 if (r) {
2634 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2635 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2636 return r;
2637 }
b0b00ff1 2638 }
d38ceaf9 2639 }
06b18f61 2640
c9f96fd5
RZ
2641 return 0;
2642}
2643
5d89bb2d
LL
2644int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2645 enum amd_powergating_state state)
c9f96fd5 2646{
1112a46b 2647 int i, j, r;
06b18f61 2648
c9f96fd5
RZ
2649 if (amdgpu_emu_mode == 1)
2650 return 0;
2651
1112a46b
RZ
2652 for (j = 0; j < adev->num_ip_blocks; j++) {
2653 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2654 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2655 continue;
47198eb7 2656 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2657 if (adev->in_s0ix &&
47198eb7
AD
2658 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2659 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2660 continue;
c9f96fd5
RZ
2661 /* skip CG for VCE/UVD, it's handled specially */
2662 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2663 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2664 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2665 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2666 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2667 /* enable powergating to save power */
2668 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2669 state);
c9f96fd5
RZ
2670 if (r) {
2671 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2672 adev->ip_blocks[i].version->funcs->name, r);
2673 return r;
2674 }
2675 }
2676 }
2dc80b00
S
2677 return 0;
2678}
2679
beff74bc
AD
2680static int amdgpu_device_enable_mgpu_fan_boost(void)
2681{
2682 struct amdgpu_gpu_instance *gpu_ins;
2683 struct amdgpu_device *adev;
2684 int i, ret = 0;
2685
2686 mutex_lock(&mgpu_info.mutex);
2687
2688 /*
2689 * MGPU fan boost feature should be enabled
2690 * only when there are two or more dGPUs in
2691 * the system
2692 */
2693 if (mgpu_info.num_dgpu < 2)
2694 goto out;
2695
2696 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2697 gpu_ins = &(mgpu_info.gpu_ins[i]);
2698 adev = gpu_ins->adev;
2699 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2700 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2701 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2702 if (ret)
2703 break;
2704
2705 gpu_ins->mgpu_fan_enabled = 1;
2706 }
2707 }
2708
2709out:
2710 mutex_unlock(&mgpu_info.mutex);
2711
2712 return ret;
2713}
2714
e3ecdffa
AD
2715/**
2716 * amdgpu_device_ip_late_init - run late init for hardware IPs
2717 *
2718 * @adev: amdgpu_device pointer
2719 *
2720 * Late initialization pass for hardware IPs. The list of all the hardware
2721 * IPs that make up the asic is walked and the late_init callbacks are run.
2722 * late_init covers any special initialization that an IP requires
2723 * after all of the have been initialized or something that needs to happen
2724 * late in the init process.
2725 * Returns 0 on success, negative error code on failure.
2726 */
06ec9070 2727static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2728{
60599a03 2729 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2730 int i = 0, r;
2731
2732 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2733 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2734 continue;
2735 if (adev->ip_blocks[i].version->funcs->late_init) {
2736 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2737 if (r) {
2738 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2739 adev->ip_blocks[i].version->funcs->name, r);
2740 return r;
2741 }
2dc80b00 2742 }
73f847db 2743 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2744 }
2745
867e24ca 2746 r = amdgpu_ras_late_init(adev);
2747 if (r) {
2748 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2749 return r;
2750 }
2751
a891d239
DL
2752 amdgpu_ras_set_error_query_ready(adev, true);
2753
1112a46b
RZ
2754 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2755 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2756
06ec9070 2757 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2758
beff74bc
AD
2759 r = amdgpu_device_enable_mgpu_fan_boost();
2760 if (r)
2761 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2762
4da8b639 2763 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2764 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2765 adev->asic_type == CHIP_ALDEBARAN ))
bc143d8b 2766 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2767
2768 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2769 mutex_lock(&mgpu_info.mutex);
2770
2771 /*
2772 * Reset device p-state to low as this was booted with high.
2773 *
2774 * This should be performed only after all devices from the same
2775 * hive get initialized.
2776 *
2777 * However, it's unknown how many device in the hive in advance.
2778 * As this is counted one by one during devices initializations.
2779 *
2780 * So, we wait for all XGMI interlinked devices initialized.
2781 * This may bring some delays as those devices may come from
2782 * different hives. But that should be OK.
2783 */
2784 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2785 for (i = 0; i < mgpu_info.num_gpu; i++) {
2786 gpu_instance = &(mgpu_info.gpu_ins[i]);
2787 if (gpu_instance->adev->flags & AMD_IS_APU)
2788 continue;
2789
d84a430d
JK
2790 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2791 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2792 if (r) {
2793 DRM_ERROR("pstate setting failed (%d).\n", r);
2794 break;
2795 }
2796 }
2797 }
2798
2799 mutex_unlock(&mgpu_info.mutex);
2800 }
2801
d38ceaf9
AD
2802 return 0;
2803}
2804
613aa3ea
LY
2805/**
2806 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2807 *
2808 * @adev: amdgpu_device pointer
2809 *
2810 * For ASICs need to disable SMC first
2811 */
2812static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2813{
2814 int i, r;
2815
2816 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2817 return;
2818
2819 for (i = 0; i < adev->num_ip_blocks; i++) {
2820 if (!adev->ip_blocks[i].status.hw)
2821 continue;
2822 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2823 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2824 /* XXX handle errors */
2825 if (r) {
2826 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2827 adev->ip_blocks[i].version->funcs->name, r);
2828 }
2829 adev->ip_blocks[i].status.hw = false;
2830 break;
2831 }
2832 }
2833}
2834
e9669fb7 2835static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2836{
2837 int i, r;
2838
e9669fb7
AG
2839 for (i = 0; i < adev->num_ip_blocks; i++) {
2840 if (!adev->ip_blocks[i].version->funcs->early_fini)
2841 continue;
5278a159 2842
e9669fb7
AG
2843 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2844 if (r) {
2845 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2846 adev->ip_blocks[i].version->funcs->name, r);
2847 }
2848 }
c030f2e4 2849
05df1f01 2850 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2851 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2852
7270e895
TY
2853 amdgpu_amdkfd_suspend(adev, false);
2854
613aa3ea
LY
2855 /* Workaroud for ASICs need to disable SMC first */
2856 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2857
d38ceaf9 2858 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2859 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2860 continue;
8201a67a 2861
a1255107 2862 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2863 /* XXX handle errors */
2c1a2784 2864 if (r) {
a1255107
AD
2865 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2866 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2867 }
8201a67a 2868
a1255107 2869 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2870 }
2871
6effad8a
GC
2872 if (amdgpu_sriov_vf(adev)) {
2873 if (amdgpu_virt_release_full_gpu(adev, false))
2874 DRM_ERROR("failed to release exclusive mode on fini\n");
2875 }
2876
e9669fb7
AG
2877 return 0;
2878}
2879
2880/**
2881 * amdgpu_device_ip_fini - run fini for hardware IPs
2882 *
2883 * @adev: amdgpu_device pointer
2884 *
2885 * Main teardown pass for hardware IPs. The list of all the hardware
2886 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2887 * are run. hw_fini tears down the hardware associated with each IP
2888 * and sw_fini tears down any software state associated with each IP.
2889 * Returns 0 on success, negative error code on failure.
2890 */
2891static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2892{
2893 int i, r;
2894
2895 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2896 amdgpu_virt_release_ras_err_handler_data(adev);
2897
e9669fb7
AG
2898 if (adev->gmc.xgmi.num_physical_nodes > 1)
2899 amdgpu_xgmi_remove_device(adev);
2900
c004d44e 2901 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2902
d38ceaf9 2903 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2904 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2905 continue;
c12aba3a
ML
2906
2907 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2908 amdgpu_ucode_free_bo(adev);
1e256e27 2909 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2910 amdgpu_device_wb_fini(adev);
7ccfd79f 2911 amdgpu_device_mem_scratch_fini(adev);
533aed27 2912 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2913 }
2914
a1255107 2915 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2916 /* XXX handle errors */
2c1a2784 2917 if (r) {
a1255107
AD
2918 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2919 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2920 }
a1255107
AD
2921 adev->ip_blocks[i].status.sw = false;
2922 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2923 }
2924
a6dcfd9c 2925 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2926 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2927 continue;
a1255107
AD
2928 if (adev->ip_blocks[i].version->funcs->late_fini)
2929 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2930 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2931 }
2932
c030f2e4 2933 amdgpu_ras_fini(adev);
2934
d38ceaf9
AD
2935 return 0;
2936}
2937
e3ecdffa 2938/**
beff74bc 2939 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2940 *
1112a46b 2941 * @work: work_struct.
e3ecdffa 2942 */
beff74bc 2943static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2944{
2945 struct amdgpu_device *adev =
beff74bc 2946 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2947 int r;
2948
2949 r = amdgpu_ib_ring_tests(adev);
2950 if (r)
2951 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2952}
2953
1e317b99
RZ
2954static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2955{
2956 struct amdgpu_device *adev =
2957 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2958
90a92662
MD
2959 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2960 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2961
2962 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2963 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2964}
2965
e3ecdffa 2966/**
e7854a03 2967 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2968 *
2969 * @adev: amdgpu_device pointer
2970 *
2971 * Main suspend function for hardware IPs. The list of all the hardware
2972 * IPs that make up the asic is walked, clockgating is disabled and the
2973 * suspend callbacks are run. suspend puts the hardware and software state
2974 * in each IP into a state suitable for suspend.
2975 * Returns 0 on success, negative error code on failure.
2976 */
e7854a03
AD
2977static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2978{
2979 int i, r;
2980
50ec83f0
AD
2981 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2982 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2983
b31d6ada
EQ
2984 /*
2985 * Per PMFW team's suggestion, driver needs to handle gfxoff
2986 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2987 * scenario. Add the missing df cstate disablement here.
2988 */
2989 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2990 dev_warn(adev->dev, "Failed to disallow df cstate");
2991
e7854a03
AD
2992 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2993 if (!adev->ip_blocks[i].status.valid)
2994 continue;
2b9f7848 2995
e7854a03 2996 /* displays are handled separately */
2b9f7848
ND
2997 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2998 continue;
2999
3000 /* XXX handle errors */
3001 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3002 /* XXX handle errors */
3003 if (r) {
3004 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3005 adev->ip_blocks[i].version->funcs->name, r);
3006 return r;
e7854a03 3007 }
2b9f7848
ND
3008
3009 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3010 }
3011
e7854a03
AD
3012 return 0;
3013}
3014
3015/**
3016 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3017 *
3018 * @adev: amdgpu_device pointer
3019 *
3020 * Main suspend function for hardware IPs. The list of all the hardware
3021 * IPs that make up the asic is walked, clockgating is disabled and the
3022 * suspend callbacks are run. suspend puts the hardware and software state
3023 * in each IP into a state suitable for suspend.
3024 * Returns 0 on success, negative error code on failure.
3025 */
3026static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3027{
3028 int i, r;
3029
557f42a2 3030 if (adev->in_s0ix)
bc143d8b 3031 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3032
d38ceaf9 3033 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3034 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3035 continue;
e7854a03
AD
3036 /* displays are handled in phase1 */
3037 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3038 continue;
bff77e86
LM
3039 /* PSP lost connection when err_event_athub occurs */
3040 if (amdgpu_ras_intr_triggered() &&
3041 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3042 adev->ip_blocks[i].status.hw = false;
3043 continue;
3044 }
e3c1b071 3045
3046 /* skip unnecessary suspend if we do not initialize them yet */
3047 if (adev->gmc.xgmi.pending_reset &&
3048 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3049 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3050 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3051 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3052 adev->ip_blocks[i].status.hw = false;
3053 continue;
3054 }
557f42a2 3055
afa6646b 3056 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3057 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3058 * like at runtime. PSP is also part of the always on hardware
3059 * so no need to suspend it.
3060 */
557f42a2 3061 if (adev->in_s0ix &&
32ff160d 3062 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3063 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3064 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3065 continue;
3066
2a7798ea
AD
3067 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3068 if (adev->in_s0ix &&
3069 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3070 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3071 continue;
3072
e11c7750
TH
3073 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3074 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3075 * from this location and RLC Autoload automatically also gets loaded
3076 * from here based on PMFW -> PSP message during re-init sequence.
3077 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3078 * the TMR and reload FWs again for IMU enabled APU ASICs.
3079 */
3080 if (amdgpu_in_reset(adev) &&
3081 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3082 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3083 continue;
3084
d38ceaf9 3085 /* XXX handle errors */
a1255107 3086 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3087 /* XXX handle errors */
2c1a2784 3088 if (r) {
a1255107
AD
3089 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3090 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3091 }
876923fb 3092 adev->ip_blocks[i].status.hw = false;
a3a09142 3093 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
3094 if(!amdgpu_sriov_vf(adev)){
3095 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3096 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3097 if (r) {
3098 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3099 adev->mp1_state, r);
3100 return r;
3101 }
a3a09142
AD
3102 }
3103 }
d38ceaf9
AD
3104 }
3105
3106 return 0;
3107}
3108
e7854a03
AD
3109/**
3110 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3111 *
3112 * @adev: amdgpu_device pointer
3113 *
3114 * Main suspend function for hardware IPs. The list of all the hardware
3115 * IPs that make up the asic is walked, clockgating is disabled and the
3116 * suspend callbacks are run. suspend puts the hardware and software state
3117 * in each IP into a state suitable for suspend.
3118 * Returns 0 on success, negative error code on failure.
3119 */
3120int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3121{
3122 int r;
3123
3c73683c
JC
3124 if (amdgpu_sriov_vf(adev)) {
3125 amdgpu_virt_fini_data_exchange(adev);
e7819644 3126 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3127 }
e7819644 3128
e7854a03
AD
3129 r = amdgpu_device_ip_suspend_phase1(adev);
3130 if (r)
3131 return r;
3132 r = amdgpu_device_ip_suspend_phase2(adev);
3133
e7819644
YT
3134 if (amdgpu_sriov_vf(adev))
3135 amdgpu_virt_release_full_gpu(adev, false);
3136
e7854a03
AD
3137 return r;
3138}
3139
06ec9070 3140static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3141{
3142 int i, r;
3143
2cb681b6 3144 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3145 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3146 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3147 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3148 AMD_IP_BLOCK_TYPE_IH,
3149 };
a90ad3c2 3150
95ea3dbc 3151 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3152 int j;
3153 struct amdgpu_ip_block *block;
a90ad3c2 3154
4cd2a96d
J
3155 block = &adev->ip_blocks[i];
3156 block->status.hw = false;
2cb681b6 3157
4cd2a96d 3158 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3159
4cd2a96d 3160 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3161 !block->status.valid)
3162 continue;
3163
3164 r = block->version->funcs->hw_init(adev);
0aaeefcc 3165 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3166 if (r)
3167 return r;
482f0e53 3168 block->status.hw = true;
a90ad3c2
ML
3169 }
3170 }
3171
3172 return 0;
3173}
3174
06ec9070 3175static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3176{
3177 int i, r;
3178
2cb681b6
ML
3179 static enum amd_ip_block_type ip_order[] = {
3180 AMD_IP_BLOCK_TYPE_SMC,
3181 AMD_IP_BLOCK_TYPE_DCE,
3182 AMD_IP_BLOCK_TYPE_GFX,
3183 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 3184 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
3185 AMD_IP_BLOCK_TYPE_VCE,
3186 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 3187 };
a90ad3c2 3188
2cb681b6
ML
3189 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3190 int j;
3191 struct amdgpu_ip_block *block;
a90ad3c2 3192
2cb681b6
ML
3193 for (j = 0; j < adev->num_ip_blocks; j++) {
3194 block = &adev->ip_blocks[j];
3195
3196 if (block->version->type != ip_order[i] ||
482f0e53
ML
3197 !block->status.valid ||
3198 block->status.hw)
2cb681b6
ML
3199 continue;
3200
895bd048
JZ
3201 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3202 r = block->version->funcs->resume(adev);
3203 else
3204 r = block->version->funcs->hw_init(adev);
3205
0aaeefcc 3206 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3207 if (r)
3208 return r;
482f0e53 3209 block->status.hw = true;
a90ad3c2
ML
3210 }
3211 }
3212
3213 return 0;
3214}
3215
e3ecdffa
AD
3216/**
3217 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3218 *
3219 * @adev: amdgpu_device pointer
3220 *
3221 * First resume function for hardware IPs. The list of all the hardware
3222 * IPs that make up the asic is walked and the resume callbacks are run for
3223 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3224 * after a suspend and updates the software state as necessary. This
3225 * function is also used for restoring the GPU after a GPU reset.
3226 * Returns 0 on success, negative error code on failure.
3227 */
06ec9070 3228static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3229{
3230 int i, r;
3231
a90ad3c2 3232 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3233 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3234 continue;
a90ad3c2 3235 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3236 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3237 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3238 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3239
fcf0649f
CZ
3240 r = adev->ip_blocks[i].version->funcs->resume(adev);
3241 if (r) {
3242 DRM_ERROR("resume of IP block <%s> failed %d\n",
3243 adev->ip_blocks[i].version->funcs->name, r);
3244 return r;
3245 }
482f0e53 3246 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3247 }
3248 }
3249
3250 return 0;
3251}
3252
e3ecdffa
AD
3253/**
3254 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3255 *
3256 * @adev: amdgpu_device pointer
3257 *
3258 * First resume function for hardware IPs. The list of all the hardware
3259 * IPs that make up the asic is walked and the resume callbacks are run for
3260 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3261 * functional state after a suspend and updates the software state as
3262 * necessary. This function is also used for restoring the GPU after a GPU
3263 * reset.
3264 * Returns 0 on success, negative error code on failure.
3265 */
06ec9070 3266static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3267{
3268 int i, r;
3269
3270 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3271 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3272 continue;
fcf0649f 3273 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3274 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3275 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3276 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3277 continue;
a1255107 3278 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3279 if (r) {
a1255107
AD
3280 DRM_ERROR("resume of IP block <%s> failed %d\n",
3281 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3282 return r;
2c1a2784 3283 }
482f0e53 3284 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3285 }
3286
3287 return 0;
3288}
3289
e3ecdffa
AD
3290/**
3291 * amdgpu_device_ip_resume - run resume for hardware IPs
3292 *
3293 * @adev: amdgpu_device pointer
3294 *
3295 * Main resume function for hardware IPs. The hardware IPs
3296 * are split into two resume functions because they are
3297 * are also used in in recovering from a GPU reset and some additional
3298 * steps need to be take between them. In this case (S3/S4) they are
3299 * run sequentially.
3300 * Returns 0 on success, negative error code on failure.
3301 */
06ec9070 3302static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3303{
3304 int r;
3305
9cec53c1
JZ
3306 r = amdgpu_amdkfd_resume_iommu(adev);
3307 if (r)
3308 return r;
3309
06ec9070 3310 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3311 if (r)
3312 return r;
7a3e0bb2
RZ
3313
3314 r = amdgpu_device_fw_loading(adev);
3315 if (r)
3316 return r;
3317
06ec9070 3318 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3319
3320 return r;
3321}
3322
e3ecdffa
AD
3323/**
3324 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3325 *
3326 * @adev: amdgpu_device pointer
3327 *
3328 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3329 */
4e99a44e 3330static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3331{
6867e1b5
ML
3332 if (amdgpu_sriov_vf(adev)) {
3333 if (adev->is_atom_fw) {
58ff791a 3334 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3335 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3336 } else {
3337 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3338 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3339 }
3340
3341 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3342 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3343 }
048765ad
AR
3344}
3345
e3ecdffa
AD
3346/**
3347 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3348 *
3349 * @asic_type: AMD asic type
3350 *
3351 * Check if there is DC (new modesetting infrastructre) support for an asic.
3352 * returns true if DC has support, false if not.
3353 */
4562236b
HW
3354bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3355{
3356 switch (asic_type) {
0637d417
AD
3357#ifdef CONFIG_DRM_AMDGPU_SI
3358 case CHIP_HAINAN:
3359#endif
3360 case CHIP_TOPAZ:
3361 /* chips with no display hardware */
3362 return false;
4562236b 3363#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3364 case CHIP_TAHITI:
3365 case CHIP_PITCAIRN:
3366 case CHIP_VERDE:
3367 case CHIP_OLAND:
2d32ffd6
AD
3368 /*
3369 * We have systems in the wild with these ASICs that require
3370 * LVDS and VGA support which is not supported with DC.
3371 *
3372 * Fallback to the non-DC driver here by default so as not to
3373 * cause regressions.
3374 */
3375#if defined(CONFIG_DRM_AMD_DC_SI)
3376 return amdgpu_dc > 0;
3377#else
3378 return false;
64200c46 3379#endif
4562236b 3380 case CHIP_BONAIRE:
0d6fbccb 3381 case CHIP_KAVERI:
367e6687
AD
3382 case CHIP_KABINI:
3383 case CHIP_MULLINS:
d9fda248
HW
3384 /*
3385 * We have systems in the wild with these ASICs that require
b5a0168e 3386 * VGA support which is not supported with DC.
d9fda248
HW
3387 *
3388 * Fallback to the non-DC driver here by default so as not to
3389 * cause regressions.
3390 */
3391 return amdgpu_dc > 0;
f7f12b25 3392 default:
fd187853 3393 return amdgpu_dc != 0;
f7f12b25 3394#else
4562236b 3395 default:
93b09a9a 3396 if (amdgpu_dc > 0)
044a48f4 3397 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3398 "but isn't supported by ASIC, ignoring\n");
4562236b 3399 return false;
f7f12b25 3400#endif
4562236b
HW
3401 }
3402}
3403
3404/**
3405 * amdgpu_device_has_dc_support - check if dc is supported
3406 *
982a820b 3407 * @adev: amdgpu_device pointer
4562236b
HW
3408 *
3409 * Returns true for supported, false for not supported
3410 */
3411bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3412{
25263da3 3413 if (adev->enable_virtual_display ||
abaf210c 3414 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3415 return false;
3416
4562236b
HW
3417 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3418}
3419
d4535e2c
AG
3420static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3421{
3422 struct amdgpu_device *adev =
3423 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3424 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3425
c6a6e2db
AG
3426 /* It's a bug to not have a hive within this function */
3427 if (WARN_ON(!hive))
3428 return;
3429
3430 /*
3431 * Use task barrier to synchronize all xgmi reset works across the
3432 * hive. task_barrier_enter and task_barrier_exit will block
3433 * until all the threads running the xgmi reset works reach
3434 * those points. task_barrier_full will do both blocks.
3435 */
3436 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3437
3438 task_barrier_enter(&hive->tb);
4a580877 3439 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3440
3441 if (adev->asic_reset_res)
3442 goto fail;
3443
3444 task_barrier_exit(&hive->tb);
4a580877 3445 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3446
3447 if (adev->asic_reset_res)
3448 goto fail;
43c4d576 3449
5e67bba3 3450 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3451 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3452 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3453 } else {
3454
3455 task_barrier_full(&hive->tb);
3456 adev->asic_reset_res = amdgpu_asic_reset(adev);
3457 }
ce316fa5 3458
c6a6e2db 3459fail:
d4535e2c 3460 if (adev->asic_reset_res)
fed184e9 3461 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3462 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3463 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3464}
3465
71f98027
AD
3466static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3467{
3468 char *input = amdgpu_lockup_timeout;
3469 char *timeout_setting = NULL;
3470 int index = 0;
3471 long timeout;
3472 int ret = 0;
3473
3474 /*
67387dfe
AD
3475 * By default timeout for non compute jobs is 10000
3476 * and 60000 for compute jobs.
71f98027 3477 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3478 * jobs are 60000 by default.
71f98027
AD
3479 */
3480 adev->gfx_timeout = msecs_to_jiffies(10000);
3481 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3482 if (amdgpu_sriov_vf(adev))
3483 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3484 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3485 else
67387dfe 3486 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3487
f440ff44 3488 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3489 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3490 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3491 ret = kstrtol(timeout_setting, 0, &timeout);
3492 if (ret)
3493 return ret;
3494
3495 if (timeout == 0) {
3496 index++;
3497 continue;
3498 } else if (timeout < 0) {
3499 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3500 dev_warn(adev->dev, "lockup timeout disabled");
3501 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3502 } else {
3503 timeout = msecs_to_jiffies(timeout);
3504 }
3505
3506 switch (index++) {
3507 case 0:
3508 adev->gfx_timeout = timeout;
3509 break;
3510 case 1:
3511 adev->compute_timeout = timeout;
3512 break;
3513 case 2:
3514 adev->sdma_timeout = timeout;
3515 break;
3516 case 3:
3517 adev->video_timeout = timeout;
3518 break;
3519 default:
3520 break;
3521 }
3522 }
3523 /*
3524 * There is only one value specified and
3525 * it should apply to all non-compute jobs.
3526 */
bcccee89 3527 if (index == 1) {
71f98027 3528 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3529 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3530 adev->compute_timeout = adev->gfx_timeout;
3531 }
71f98027
AD
3532 }
3533
3534 return ret;
3535}
d4535e2c 3536
4a74c38c
PY
3537/**
3538 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3539 *
3540 * @adev: amdgpu_device pointer
3541 *
3542 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3543 */
3544static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3545{
3546 struct iommu_domain *domain;
3547
3548 domain = iommu_get_domain_for_dev(adev->dev);
3549 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3550 adev->ram_is_direct_mapped = true;
3551}
3552
77f3a5cd
ND
3553static const struct attribute *amdgpu_dev_attributes[] = {
3554 &dev_attr_product_name.attr,
3555 &dev_attr_product_number.attr,
3556 &dev_attr_serial_number.attr,
3557 &dev_attr_pcie_replay_count.attr,
3558 NULL
3559};
3560
d38ceaf9
AD
3561/**
3562 * amdgpu_device_init - initialize the driver
3563 *
3564 * @adev: amdgpu_device pointer
d38ceaf9
AD
3565 * @flags: driver flags
3566 *
3567 * Initializes the driver info and hw (all asics).
3568 * Returns 0 for success or an error on failure.
3569 * Called at driver startup.
3570 */
3571int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3572 uint32_t flags)
3573{
8aba21b7
LT
3574 struct drm_device *ddev = adev_to_drm(adev);
3575 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3576 int r, i;
b98c6299 3577 bool px = false;
95844d20 3578 u32 max_MBps;
d38ceaf9
AD
3579
3580 adev->shutdown = false;
d38ceaf9 3581 adev->flags = flags;
4e66d7d2
YZ
3582
3583 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3584 adev->asic_type = amdgpu_force_asic_type;
3585 else
3586 adev->asic_type = flags & AMD_ASIC_MASK;
3587
d38ceaf9 3588 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3589 if (amdgpu_emu_mode == 1)
8bdab6bb 3590 adev->usec_timeout *= 10;
770d13b1 3591 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3592 adev->accel_working = false;
3593 adev->num_rings = 0;
68ce8b24 3594 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3595 adev->mman.buffer_funcs = NULL;
3596 adev->mman.buffer_funcs_ring = NULL;
3597 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3598 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3599 adev->gmc.gmc_funcs = NULL;
7bd939d0 3600 adev->harvest_ip_mask = 0x0;
f54d1867 3601 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3602 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3603
3604 adev->smc_rreg = &amdgpu_invalid_rreg;
3605 adev->smc_wreg = &amdgpu_invalid_wreg;
3606 adev->pcie_rreg = &amdgpu_invalid_rreg;
3607 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3608 adev->pciep_rreg = &amdgpu_invalid_rreg;
3609 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3610 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3611 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3612 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3613 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3614 adev->didt_rreg = &amdgpu_invalid_rreg;
3615 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3616 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3617 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3618 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3619 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3620
3e39ab90
AD
3621 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3622 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3623 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3624
3625 /* mutex initialization are all done here so we
3626 * can recall function without having locking issues */
0e5ca0d1 3627 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3628 mutex_init(&adev->pm.mutex);
3629 mutex_init(&adev->gfx.gpu_clock_mutex);
3630 mutex_init(&adev->srbm_mutex);
b8866c26 3631 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3632 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3633 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3634 mutex_init(&adev->mn_lock);
e23b74aa 3635 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3636 hash_init(adev->mn_hash);
32eaeae0 3637 mutex_init(&adev->psp.mutex);
bd052211 3638 mutex_init(&adev->notifier_lock);
8cda7a4f 3639 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3640 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3641
ab3b9de6 3642 amdgpu_device_init_apu_flags(adev);
9f6a7857 3643
912dfc84
EQ
3644 r = amdgpu_device_check_arguments(adev);
3645 if (r)
3646 return r;
d38ceaf9 3647
d38ceaf9
AD
3648 spin_lock_init(&adev->mmio_idx_lock);
3649 spin_lock_init(&adev->smc_idx_lock);
3650 spin_lock_init(&adev->pcie_idx_lock);
3651 spin_lock_init(&adev->uvd_ctx_idx_lock);
3652 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3653 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3654 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3655 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3656 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3657
0c4e7fa5
CZ
3658 INIT_LIST_HEAD(&adev->shadow_list);
3659 mutex_init(&adev->shadow_list_lock);
3660
655ce9cb 3661 INIT_LIST_HEAD(&adev->reset_list);
3662
6492e1b0 3663 INIT_LIST_HEAD(&adev->ras_list);
3664
beff74bc
AD
3665 INIT_DELAYED_WORK(&adev->delayed_init_work,
3666 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3667 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3668 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3669
d4535e2c
AG
3670 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3671
d23ee13f 3672 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3673 adev->gfx.gfx_off_residency = 0;
3674 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3675 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3676
b265bdbd
EQ
3677 atomic_set(&adev->throttling_logging_enabled, 1);
3678 /*
3679 * If throttling continues, logging will be performed every minute
3680 * to avoid log flooding. "-1" is subtracted since the thermal
3681 * throttling interrupt comes every second. Thus, the total logging
3682 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3683 * for throttling interrupt) = 60 seconds.
3684 */
3685 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3686 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3687
0fa49558
AX
3688 /* Registers mapping */
3689 /* TODO: block userspace mapping of io register */
da69c161
KW
3690 if (adev->asic_type >= CHIP_BONAIRE) {
3691 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3692 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3693 } else {
3694 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3695 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3696 }
d38ceaf9 3697
6c08e0ef
EQ
3698 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3699 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3700
d38ceaf9
AD
3701 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3702 if (adev->rmmio == NULL) {
3703 return -ENOMEM;
3704 }
3705 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3706 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3707
5494d864
AD
3708 amdgpu_device_get_pcie_info(adev);
3709
b239c017
JX
3710 if (amdgpu_mcbp)
3711 DRM_INFO("MCBP is enabled\n");
3712
436afdfa
PY
3713 /*
3714 * Reset domain needs to be present early, before XGMI hive discovered
3715 * (if any) and intitialized to use reset sem and in_gpu reset flag
3716 * early on during init and before calling to RREG32.
3717 */
3718 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3719 if (!adev->reset_domain)
3720 return -ENOMEM;
3721
3aa0115d
ML
3722 /* detect hw virtualization here */
3723 amdgpu_detect_virtualization(adev);
3724
dffa11b4
ML
3725 r = amdgpu_device_get_job_timeout_settings(adev);
3726 if (r) {
3727 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3728 return r;
a190d1c7
XY
3729 }
3730
d38ceaf9 3731 /* early init functions */
06ec9070 3732 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3733 if (r)
4ef87d8f 3734 return r;
d38ceaf9 3735
b7cdb41e
ML
3736 /* Get rid of things like offb */
3737 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3738 if (r)
3739 return r;
3740
4d33e704
SK
3741 /* Enable TMZ based on IP_VERSION */
3742 amdgpu_gmc_tmz_set(adev);
3743
957b0787 3744 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3745 /* Need to get xgmi info early to decide the reset behavior*/
3746 if (adev->gmc.xgmi.supported) {
3747 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3748 if (r)
3749 return r;
3750 }
3751
8e6d0b69 3752 /* enable PCIE atomic ops */
3753 if (amdgpu_sriov_vf(adev))
3754 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
e15c9d06 3755 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
8e6d0b69 3756 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3757 else
3758 adev->have_atomics_support =
3759 !pci_enable_atomic_ops_to_root(adev->pdev,
3760 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3761 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3762 if (!adev->have_atomics_support)
3763 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3764
6585661d
OZ
3765 /* doorbell bar mapping and doorbell index init*/
3766 amdgpu_device_doorbell_init(adev);
3767
9475a943
SL
3768 if (amdgpu_emu_mode == 1) {
3769 /* post the asic on emulation mode */
3770 emu_soc_asic_init(adev);
bfca0289 3771 goto fence_driver_init;
9475a943 3772 }
bfca0289 3773
04442bf7
LL
3774 amdgpu_reset_init(adev);
3775
4e99a44e
ML
3776 /* detect if we are with an SRIOV vbios */
3777 amdgpu_device_detect_sriov_bios(adev);
048765ad 3778
95e8e59e
AD
3779 /* check if we need to reset the asic
3780 * E.g., driver was not cleanly unloaded previously, etc.
3781 */
f14899fd 3782 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3783 if (adev->gmc.xgmi.num_physical_nodes) {
3784 dev_info(adev->dev, "Pending hive reset.\n");
3785 adev->gmc.xgmi.pending_reset = true;
3786 /* Only need to init necessary block for SMU to handle the reset */
3787 for (i = 0; i < adev->num_ip_blocks; i++) {
3788 if (!adev->ip_blocks[i].status.valid)
3789 continue;
3790 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3791 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3792 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3793 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3794 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3795 adev->ip_blocks[i].version->funcs->name);
3796 adev->ip_blocks[i].status.hw = true;
3797 }
3798 }
3799 } else {
3800 r = amdgpu_asic_reset(adev);
3801 if (r) {
3802 dev_err(adev->dev, "asic reset on init failed\n");
3803 goto failed;
3804 }
95e8e59e
AD
3805 }
3806 }
3807
d38ceaf9 3808 /* Post card if necessary */
39c640c0 3809 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3810 if (!adev->bios) {
bec86378 3811 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3812 r = -EINVAL;
3813 goto failed;
d38ceaf9 3814 }
bec86378 3815 DRM_INFO("GPU posting now...\n");
4d2997ab 3816 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3817 if (r) {
3818 dev_err(adev->dev, "gpu post error!\n");
3819 goto failed;
3820 }
d38ceaf9
AD
3821 }
3822
88b64e95
AD
3823 if (adev->is_atom_fw) {
3824 /* Initialize clocks */
3825 r = amdgpu_atomfirmware_get_clock_info(adev);
3826 if (r) {
3827 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3828 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3829 goto failed;
3830 }
3831 } else {
a5bde2f9
AD
3832 /* Initialize clocks */
3833 r = amdgpu_atombios_get_clock_info(adev);
3834 if (r) {
3835 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3836 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3837 goto failed;
a5bde2f9
AD
3838 }
3839 /* init i2c buses */
4562236b
HW
3840 if (!amdgpu_device_has_dc_support(adev))
3841 amdgpu_atombios_i2c_init(adev);
2c1a2784 3842 }
d38ceaf9 3843
bfca0289 3844fence_driver_init:
d38ceaf9 3845 /* Fence driver */
067f44c8 3846 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3847 if (r) {
067f44c8 3848 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3849 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3850 goto failed;
2c1a2784 3851 }
d38ceaf9
AD
3852
3853 /* init the mode config */
4a580877 3854 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3855
06ec9070 3856 r = amdgpu_device_ip_init(adev);
d38ceaf9 3857 if (r) {
8840a387 3858 /* failed in exclusive mode due to timeout */
3859 if (amdgpu_sriov_vf(adev) &&
3860 !amdgpu_sriov_runtime(adev) &&
3861 amdgpu_virt_mmio_blocked(adev) &&
3862 !amdgpu_virt_wait_reset(adev)) {
3863 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3864 /* Don't send request since VF is inactive. */
3865 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3866 adev->virt.ops = NULL;
8840a387 3867 r = -EAGAIN;
970fd197 3868 goto release_ras_con;
8840a387 3869 }
06ec9070 3870 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3871 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3872 goto release_ras_con;
d38ceaf9
AD
3873 }
3874
8d35a259
LG
3875 amdgpu_fence_driver_hw_init(adev);
3876
d69b8971
YZ
3877 dev_info(adev->dev,
3878 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3879 adev->gfx.config.max_shader_engines,
3880 adev->gfx.config.max_sh_per_se,
3881 adev->gfx.config.max_cu_per_sh,
3882 adev->gfx.cu_info.number);
3883
d38ceaf9
AD
3884 adev->accel_working = true;
3885
e59c0205
AX
3886 amdgpu_vm_check_compute_bug(adev);
3887
95844d20
MO
3888 /* Initialize the buffer migration limit. */
3889 if (amdgpu_moverate >= 0)
3890 max_MBps = amdgpu_moverate;
3891 else
3892 max_MBps = 8; /* Allow 8 MB/s. */
3893 /* Get a log2 for easy divisions. */
3894 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3895
d2f52ac8 3896 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
3897 if (r)
3898 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 3899
5bb23532 3900 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3901 if (r) {
3902 adev->ucode_sysfs_en = false;
5bb23532 3903 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3904 } else
3905 adev->ucode_sysfs_en = true;
5bb23532 3906
8424f2cc
LG
3907 r = amdgpu_psp_sysfs_init(adev);
3908 if (r) {
3909 adev->psp_sysfs_en = false;
3910 if (!amdgpu_sriov_vf(adev))
3911 DRM_ERROR("Creating psp sysfs failed\n");
3912 } else
3913 adev->psp_sysfs_en = true;
3914
b0adca4d
EQ
3915 /*
3916 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3917 * Otherwise the mgpu fan boost feature will be skipped due to the
3918 * gpu instance is counted less.
3919 */
3920 amdgpu_register_gpu_instance(adev);
3921
d38ceaf9
AD
3922 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3923 * explicit gating rather than handling it automatically.
3924 */
e3c1b071 3925 if (!adev->gmc.xgmi.pending_reset) {
3926 r = amdgpu_device_ip_late_init(adev);
3927 if (r) {
3928 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3929 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3930 goto release_ras_con;
e3c1b071 3931 }
3932 /* must succeed. */
3933 amdgpu_ras_resume(adev);
3934 queue_delayed_work(system_wq, &adev->delayed_init_work,
3935 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3936 }
d38ceaf9 3937
2c738637
ML
3938 if (amdgpu_sriov_vf(adev))
3939 flush_delayed_work(&adev->delayed_init_work);
3940
77f3a5cd 3941 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3942 if (r)
77f3a5cd 3943 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3944
d155bef0
AB
3945 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3946 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3947 if (r)
3948 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3949
c1dd4aa6
AG
3950 /* Have stored pci confspace at hand for restore in sudden PCI error */
3951 if (amdgpu_device_cache_pci_state(adev->pdev))
3952 pci_restore_state(pdev);
3953
8c3dd61c
KHF
3954 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3955 /* this will fail for cards that aren't VGA class devices, just
3956 * ignore it */
3957 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3958 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 3959
d37a3929
OC
3960 px = amdgpu_device_supports_px(ddev);
3961
3962 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3963 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
3964 vga_switcheroo_register_client(adev->pdev,
3965 &amdgpu_switcheroo_ops, px);
d37a3929
OC
3966
3967 if (px)
8c3dd61c 3968 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 3969
e3c1b071 3970 if (adev->gmc.xgmi.pending_reset)
3971 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3972 msecs_to_jiffies(AMDGPU_RESUME_MS));
3973
4a74c38c
PY
3974 amdgpu_device_check_iommu_direct_map(adev);
3975
d38ceaf9 3976 return 0;
83ba126a 3977
970fd197
SY
3978release_ras_con:
3979 amdgpu_release_ras_context(adev);
3980
83ba126a 3981failed:
89041940 3982 amdgpu_vf_error_trans_all(adev);
8840a387 3983
83ba126a 3984 return r;
d38ceaf9
AD
3985}
3986
07775fc1
AG
3987static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3988{
62d5f9f7 3989
07775fc1
AG
3990 /* Clear all CPU mappings pointing to this device */
3991 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3992
3993 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3994 amdgpu_device_doorbell_fini(adev);
3995
3996 iounmap(adev->rmmio);
3997 adev->rmmio = NULL;
3998 if (adev->mman.aper_base_kaddr)
3999 iounmap(adev->mman.aper_base_kaddr);
4000 adev->mman.aper_base_kaddr = NULL;
4001
4002 /* Memory manager related */
4003 if (!adev->gmc.xgmi.connected_to_cpu) {
4004 arch_phys_wc_del(adev->gmc.vram_mtrr);
4005 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4006 }
4007}
4008
d38ceaf9 4009/**
bbe04dec 4010 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4011 *
4012 * @adev: amdgpu_device pointer
4013 *
4014 * Tear down the driver info (all asics).
4015 * Called at driver shutdown.
4016 */
72c8c97b 4017void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4018{
aac89168 4019 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4020 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4021 adev->shutdown = true;
9f875167 4022
752c683d
ML
4023 /* make sure IB test finished before entering exclusive mode
4024 * to avoid preemption on IB test
4025 * */
519b8b76 4026 if (amdgpu_sriov_vf(adev)) {
752c683d 4027 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4028 amdgpu_virt_fini_data_exchange(adev);
4029 }
752c683d 4030
e5b03032
ML
4031 /* disable all interrupts */
4032 amdgpu_irq_disable_all(adev);
ff97cba8 4033 if (adev->mode_info.mode_config_initialized){
1053b9c9 4034 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4035 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4036 else
4a580877 4037 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4038 }
8d35a259 4039 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4040
cd3a8a59 4041 if (adev->mman.initialized)
9bff18d1 4042 drain_workqueue(adev->mman.bdev.wq);
98f56188 4043
53e9d836 4044 if (adev->pm.sysfs_initialized)
7c868b59 4045 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4046 if (adev->ucode_sysfs_en)
4047 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
4048 if (adev->psp_sysfs_en)
4049 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
4050 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4051
232d1d43
SY
4052 /* disable ras feature must before hw fini */
4053 amdgpu_ras_pre_fini(adev);
4054
e9669fb7 4055 amdgpu_device_ip_fini_early(adev);
d10d0daa 4056
a3848df6
YW
4057 amdgpu_irq_fini_hw(adev);
4058
b6fd6e0f
SK
4059 if (adev->mman.initialized)
4060 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4061
d10d0daa 4062 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4063
39934d3e
VP
4064 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4065 amdgpu_device_unmap_mmio(adev);
87172e89 4066
72c8c97b
AG
4067}
4068
4069void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4070{
62d5f9f7 4071 int idx;
d37a3929 4072 bool px;
62d5f9f7 4073
8d35a259 4074 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4075 amdgpu_device_ip_fini(adev);
b31d3063 4076 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4077 adev->accel_working = false;
68ce8b24 4078 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4079
4080 amdgpu_reset_fini(adev);
4081
d38ceaf9 4082 /* free i2c buses */
4562236b
HW
4083 if (!amdgpu_device_has_dc_support(adev))
4084 amdgpu_i2c_fini(adev);
bfca0289
SL
4085
4086 if (amdgpu_emu_mode != 1)
4087 amdgpu_atombios_fini(adev);
4088
d38ceaf9
AD
4089 kfree(adev->bios);
4090 adev->bios = NULL;
d37a3929
OC
4091
4092 px = amdgpu_device_supports_px(adev_to_drm(adev));
4093
4094 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4095 apple_gmux_detect(NULL, NULL)))
84c8b22e 4096 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4097
4098 if (px)
83ba126a 4099 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4100
38d6be81 4101 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4102 vga_client_unregister(adev->pdev);
e9bc1bf7 4103
62d5f9f7
LS
4104 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4105
4106 iounmap(adev->rmmio);
4107 adev->rmmio = NULL;
4108 amdgpu_device_doorbell_fini(adev);
4109 drm_dev_exit(idx);
4110 }
4111
d155bef0
AB
4112 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4113 amdgpu_pmu_fini(adev);
72de33f8 4114 if (adev->mman.discovery_bin)
a190d1c7 4115 amdgpu_discovery_fini(adev);
72c8c97b 4116
cfbb6b00
AG
4117 amdgpu_reset_put_reset_domain(adev->reset_domain);
4118 adev->reset_domain = NULL;
4119
72c8c97b
AG
4120 kfree(adev->pci_state);
4121
d38ceaf9
AD
4122}
4123
58144d28
ND
4124/**
4125 * amdgpu_device_evict_resources - evict device resources
4126 * @adev: amdgpu device object
4127 *
4128 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4129 * of the vram memory type. Mainly used for evicting device resources
4130 * at suspend time.
4131 *
4132 */
7863c155 4133static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4134{
7863c155
ML
4135 int ret;
4136
e53d9665
ML
4137 /* No need to evict vram on APUs for suspend to ram or s2idle */
4138 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4139 return 0;
58144d28 4140
7863c155
ML
4141 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4142 if (ret)
58144d28 4143 DRM_WARN("evicting device resources failed\n");
7863c155 4144 return ret;
58144d28 4145}
d38ceaf9
AD
4146
4147/*
4148 * Suspend & resume.
4149 */
4150/**
810ddc3a 4151 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4152 *
87e3f136 4153 * @dev: drm dev pointer
87e3f136 4154 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4155 *
4156 * Puts the hw in the suspend state (all asics).
4157 * Returns 0 for success or an error on failure.
4158 * Called at driver suspend.
4159 */
de185019 4160int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4161{
a2e15b0e 4162 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4163 int r = 0;
d38ceaf9 4164
d38ceaf9
AD
4165 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4166 return 0;
4167
44779b43 4168 adev->in_suspend = true;
3fa8f89d 4169
47ea2076
SF
4170 /* Evict the majority of BOs before grabbing the full access */
4171 r = amdgpu_device_evict_resources(adev);
4172 if (r)
4173 return r;
4174
d7274ec7
BZ
4175 if (amdgpu_sriov_vf(adev)) {
4176 amdgpu_virt_fini_data_exchange(adev);
4177 r = amdgpu_virt_request_full_gpu(adev, false);
4178 if (r)
4179 return r;
4180 }
4181
3fa8f89d
S
4182 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4183 DRM_WARN("smart shift update failed\n");
4184
5f818173 4185 if (fbcon)
087451f3 4186 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4187
beff74bc 4188 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4189
5e6932fe 4190 amdgpu_ras_suspend(adev);
4191
2196927b 4192 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4193
c004d44e 4194 if (!adev->in_s0ix)
5d3a2d95 4195 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4196
7863c155
ML
4197 r = amdgpu_device_evict_resources(adev);
4198 if (r)
4199 return r;
d38ceaf9 4200
8d35a259 4201 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4202
2196927b 4203 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4204
d7274ec7
BZ
4205 if (amdgpu_sriov_vf(adev))
4206 amdgpu_virt_release_full_gpu(adev, false);
4207
d38ceaf9
AD
4208 return 0;
4209}
4210
4211/**
810ddc3a 4212 * amdgpu_device_resume - initiate device resume
d38ceaf9 4213 *
87e3f136 4214 * @dev: drm dev pointer
87e3f136 4215 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4216 *
4217 * Bring the hw back to operating state (all asics).
4218 * Returns 0 for success or an error on failure.
4219 * Called at driver resume.
4220 */
de185019 4221int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4222{
1348969a 4223 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4224 int r = 0;
d38ceaf9 4225
d7274ec7
BZ
4226 if (amdgpu_sriov_vf(adev)) {
4227 r = amdgpu_virt_request_full_gpu(adev, true);
4228 if (r)
4229 return r;
4230 }
4231
d38ceaf9
AD
4232 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4233 return 0;
4234
62498733 4235 if (adev->in_s0ix)
bc143d8b 4236 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4237
d38ceaf9 4238 /* post card */
39c640c0 4239 if (amdgpu_device_need_post(adev)) {
4d2997ab 4240 r = amdgpu_device_asic_init(adev);
74b0b157 4241 if (r)
aac89168 4242 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4243 }
d38ceaf9 4244
06ec9070 4245 r = amdgpu_device_ip_resume(adev);
d7274ec7 4246
e6707218 4247 if (r) {
aac89168 4248 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4249 goto exit;
e6707218 4250 }
8d35a259 4251 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4252
06ec9070 4253 r = amdgpu_device_ip_late_init(adev);
03161a6e 4254 if (r)
3c22c1ea 4255 goto exit;
d38ceaf9 4256
beff74bc
AD
4257 queue_delayed_work(system_wq, &adev->delayed_init_work,
4258 msecs_to_jiffies(AMDGPU_RESUME_MS));
4259
c004d44e 4260 if (!adev->in_s0ix) {
5d3a2d95
AD
4261 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4262 if (r)
3c22c1ea 4263 goto exit;
5d3a2d95 4264 }
756e6880 4265
3c22c1ea
SF
4266exit:
4267 if (amdgpu_sriov_vf(adev)) {
4268 amdgpu_virt_init_data_exchange(adev);
4269 amdgpu_virt_release_full_gpu(adev, true);
4270 }
4271
4272 if (r)
4273 return r;
4274
96a5d8d4 4275 /* Make sure IB tests flushed */
beff74bc 4276 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4277
a2e15b0e 4278 if (fbcon)
087451f3 4279 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4280
5e6932fe 4281 amdgpu_ras_resume(adev);
4282
d09ef243
AD
4283 if (adev->mode_info.num_crtc) {
4284 /*
4285 * Most of the connector probing functions try to acquire runtime pm
4286 * refs to ensure that the GPU is powered on when connector polling is
4287 * performed. Since we're calling this from a runtime PM callback,
4288 * trying to acquire rpm refs will cause us to deadlock.
4289 *
4290 * Since we're guaranteed to be holding the rpm lock, it's safe to
4291 * temporarily disable the rpm helpers so this doesn't deadlock us.
4292 */
23a1a9e5 4293#ifdef CONFIG_PM
d09ef243 4294 dev->dev->power.disable_depth++;
23a1a9e5 4295#endif
d09ef243
AD
4296 if (!adev->dc_enabled)
4297 drm_helper_hpd_irq_event(dev);
4298 else
4299 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4300#ifdef CONFIG_PM
d09ef243 4301 dev->dev->power.disable_depth--;
23a1a9e5 4302#endif
d09ef243 4303 }
44779b43
RZ
4304 adev->in_suspend = false;
4305
dc907c9d
JX
4306 if (adev->enable_mes)
4307 amdgpu_mes_self_test(adev);
4308
3fa8f89d
S
4309 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4310 DRM_WARN("smart shift update failed\n");
4311
4d3b9ae5 4312 return 0;
d38ceaf9
AD
4313}
4314
e3ecdffa
AD
4315/**
4316 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4317 *
4318 * @adev: amdgpu_device pointer
4319 *
4320 * The list of all the hardware IPs that make up the asic is walked and
4321 * the check_soft_reset callbacks are run. check_soft_reset determines
4322 * if the asic is still hung or not.
4323 * Returns true if any of the IPs are still in a hung state, false if not.
4324 */
06ec9070 4325static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4326{
4327 int i;
4328 bool asic_hang = false;
4329
f993d628
ML
4330 if (amdgpu_sriov_vf(adev))
4331 return true;
4332
8bc04c29
AD
4333 if (amdgpu_asic_need_full_reset(adev))
4334 return true;
4335
63fbf42f 4336 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4337 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4338 continue;
a1255107
AD
4339 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4340 adev->ip_blocks[i].status.hang =
4341 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4342 if (adev->ip_blocks[i].status.hang) {
aac89168 4343 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4344 asic_hang = true;
4345 }
4346 }
4347 return asic_hang;
4348}
4349
e3ecdffa
AD
4350/**
4351 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4352 *
4353 * @adev: amdgpu_device pointer
4354 *
4355 * The list of all the hardware IPs that make up the asic is walked and the
4356 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4357 * handles any IP specific hardware or software state changes that are
4358 * necessary for a soft reset to succeed.
4359 * Returns 0 on success, negative error code on failure.
4360 */
06ec9070 4361static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4362{
4363 int i, r = 0;
4364
4365 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4366 if (!adev->ip_blocks[i].status.valid)
d31a501e 4367 continue;
a1255107
AD
4368 if (adev->ip_blocks[i].status.hang &&
4369 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4370 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4371 if (r)
4372 return r;
4373 }
4374 }
4375
4376 return 0;
4377}
4378
e3ecdffa
AD
4379/**
4380 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4381 *
4382 * @adev: amdgpu_device pointer
4383 *
4384 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4385 * reset is necessary to recover.
4386 * Returns true if a full asic reset is required, false if not.
4387 */
06ec9070 4388static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4389{
da146d3b
AD
4390 int i;
4391
8bc04c29
AD
4392 if (amdgpu_asic_need_full_reset(adev))
4393 return true;
4394
da146d3b 4395 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4396 if (!adev->ip_blocks[i].status.valid)
da146d3b 4397 continue;
a1255107
AD
4398 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4399 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4400 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4401 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4402 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4403 if (adev->ip_blocks[i].status.hang) {
aac89168 4404 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4405 return true;
4406 }
4407 }
35d782fe
CZ
4408 }
4409 return false;
4410}
4411
e3ecdffa
AD
4412/**
4413 * amdgpu_device_ip_soft_reset - do a soft reset
4414 *
4415 * @adev: amdgpu_device pointer
4416 *
4417 * The list of all the hardware IPs that make up the asic is walked and the
4418 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4419 * IP specific hardware or software state changes that are necessary to soft
4420 * reset the IP.
4421 * Returns 0 on success, negative error code on failure.
4422 */
06ec9070 4423static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4424{
4425 int i, r = 0;
4426
4427 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4428 if (!adev->ip_blocks[i].status.valid)
35d782fe 4429 continue;
a1255107
AD
4430 if (adev->ip_blocks[i].status.hang &&
4431 adev->ip_blocks[i].version->funcs->soft_reset) {
4432 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4433 if (r)
4434 return r;
4435 }
4436 }
4437
4438 return 0;
4439}
4440
e3ecdffa
AD
4441/**
4442 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4443 *
4444 * @adev: amdgpu_device pointer
4445 *
4446 * The list of all the hardware IPs that make up the asic is walked and the
4447 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4448 * handles any IP specific hardware or software state changes that are
4449 * necessary after the IP has been soft reset.
4450 * Returns 0 on success, negative error code on failure.
4451 */
06ec9070 4452static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4453{
4454 int i, r = 0;
4455
4456 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4457 if (!adev->ip_blocks[i].status.valid)
35d782fe 4458 continue;
a1255107
AD
4459 if (adev->ip_blocks[i].status.hang &&
4460 adev->ip_blocks[i].version->funcs->post_soft_reset)
4461 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4462 if (r)
4463 return r;
4464 }
4465
4466 return 0;
4467}
4468
e3ecdffa 4469/**
c33adbc7 4470 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4471 *
4472 * @adev: amdgpu_device pointer
4473 *
4474 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4475 * restore things like GPUVM page tables after a GPU reset where
4476 * the contents of VRAM might be lost.
403009bf
CK
4477 *
4478 * Returns:
4479 * 0 on success, negative error code on failure.
e3ecdffa 4480 */
c33adbc7 4481static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4482{
c41d1cf6 4483 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4484 struct amdgpu_bo *shadow;
e18aaea7 4485 struct amdgpu_bo_vm *vmbo;
403009bf 4486 long r = 1, tmo;
c41d1cf6
ML
4487
4488 if (amdgpu_sriov_runtime(adev))
b045d3af 4489 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4490 else
4491 tmo = msecs_to_jiffies(100);
4492
aac89168 4493 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4494 mutex_lock(&adev->shadow_list_lock);
e18aaea7
ND
4495 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4496 shadow = &vmbo->bo;
403009bf 4497 /* No need to recover an evicted BO */
d3116756
CK
4498 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4499 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4500 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4501 continue;
4502
4503 r = amdgpu_bo_restore_shadow(shadow, &next);
4504 if (r)
4505 break;
4506
c41d1cf6 4507 if (fence) {
1712fb1a 4508 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4509 dma_fence_put(fence);
4510 fence = next;
1712fb1a 4511 if (tmo == 0) {
4512 r = -ETIMEDOUT;
c41d1cf6 4513 break;
1712fb1a 4514 } else if (tmo < 0) {
4515 r = tmo;
4516 break;
4517 }
403009bf
CK
4518 } else {
4519 fence = next;
c41d1cf6 4520 }
c41d1cf6
ML
4521 }
4522 mutex_unlock(&adev->shadow_list_lock);
4523
403009bf
CK
4524 if (fence)
4525 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4526 dma_fence_put(fence);
4527
1712fb1a 4528 if (r < 0 || tmo <= 0) {
aac89168 4529 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4530 return -EIO;
4531 }
c41d1cf6 4532
aac89168 4533 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4534 return 0;
c41d1cf6
ML
4535}
4536
a90ad3c2 4537
e3ecdffa 4538/**
06ec9070 4539 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4540 *
982a820b 4541 * @adev: amdgpu_device pointer
87e3f136 4542 * @from_hypervisor: request from hypervisor
5740682e
ML
4543 *
4544 * do VF FLR and reinitialize Asic
3f48c681 4545 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4546 */
4547static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4548 bool from_hypervisor)
5740682e
ML
4549{
4550 int r;
a5f67c93 4551 struct amdgpu_hive_info *hive = NULL;
7258fa31 4552 int retry_limit = 0;
5740682e 4553
7258fa31 4554retry:
c004d44e 4555 amdgpu_amdkfd_pre_reset(adev);
428890a3 4556
5740682e
ML
4557 if (from_hypervisor)
4558 r = amdgpu_virt_request_full_gpu(adev, true);
4559 else
4560 r = amdgpu_virt_reset_gpu(adev);
4561 if (r)
4562 return r;
a90ad3c2
ML
4563
4564 /* Resume IP prior to SMC */
06ec9070 4565 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4566 if (r)
4567 goto error;
a90ad3c2 4568
c9ffa427 4569 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4570
7a3e0bb2
RZ
4571 r = amdgpu_device_fw_loading(adev);
4572 if (r)
4573 return r;
4574
a90ad3c2 4575 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4576 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4577 if (r)
4578 goto error;
a90ad3c2 4579
a5f67c93
ZL
4580 hive = amdgpu_get_xgmi_hive(adev);
4581 /* Update PSP FW topology after reset */
4582 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4583 r = amdgpu_xgmi_update_topology(hive, adev);
4584
4585 if (hive)
4586 amdgpu_put_xgmi_hive(hive);
4587
4588 if (!r) {
4589 amdgpu_irq_gpu_reset_resume_helper(adev);
4590 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4591
c004d44e 4592 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4593 }
a90ad3c2 4594
abc34253 4595error:
c41d1cf6 4596 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4597 amdgpu_inc_vram_lost(adev);
c33adbc7 4598 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4599 }
437f3e0b 4600 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4601
7258fa31
SK
4602 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4603 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4604 retry_limit++;
4605 goto retry;
4606 } else
4607 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4608 }
4609
a90ad3c2
ML
4610 return r;
4611}
4612
9a1cddd6 4613/**
4614 * amdgpu_device_has_job_running - check if there is any job in mirror list
4615 *
982a820b 4616 * @adev: amdgpu_device pointer
9a1cddd6 4617 *
4618 * check if there is any job in mirror list
4619 */
4620bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4621{
4622 int i;
4623 struct drm_sched_job *job;
4624
4625 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4626 struct amdgpu_ring *ring = adev->rings[i];
4627
4628 if (!ring || !ring->sched.thread)
4629 continue;
4630
4631 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4632 job = list_first_entry_or_null(&ring->sched.pending_list,
4633 struct drm_sched_job, list);
9a1cddd6 4634 spin_unlock(&ring->sched.job_list_lock);
4635 if (job)
4636 return true;
4637 }
4638 return false;
4639}
4640
12938fad
CK
4641/**
4642 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4643 *
982a820b 4644 * @adev: amdgpu_device pointer
12938fad
CK
4645 *
4646 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4647 * a hung GPU.
4648 */
4649bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4650{
12938fad 4651
3ba7b418
AG
4652 if (amdgpu_gpu_recovery == 0)
4653 goto disabled;
4654
1a11a65d
YC
4655 /* Skip soft reset check in fatal error mode */
4656 if (!amdgpu_ras_is_poison_mode_supported(adev))
4657 return true;
4658
3ba7b418
AG
4659 if (amdgpu_sriov_vf(adev))
4660 return true;
4661
4662 if (amdgpu_gpu_recovery == -1) {
4663 switch (adev->asic_type) {
b3523c45
AD
4664#ifdef CONFIG_DRM_AMDGPU_SI
4665 case CHIP_VERDE:
4666 case CHIP_TAHITI:
4667 case CHIP_PITCAIRN:
4668 case CHIP_OLAND:
4669 case CHIP_HAINAN:
4670#endif
4671#ifdef CONFIG_DRM_AMDGPU_CIK
4672 case CHIP_KAVERI:
4673 case CHIP_KABINI:
4674 case CHIP_MULLINS:
4675#endif
4676 case CHIP_CARRIZO:
4677 case CHIP_STONEY:
4678 case CHIP_CYAN_SKILLFISH:
3ba7b418 4679 goto disabled;
b3523c45
AD
4680 default:
4681 break;
3ba7b418 4682 }
12938fad
CK
4683 }
4684
4685 return true;
3ba7b418
AG
4686
4687disabled:
aac89168 4688 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4689 return false;
12938fad
CK
4690}
4691
5c03e584
FX
4692int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4693{
4694 u32 i;
4695 int ret = 0;
4696
4697 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4698
4699 dev_info(adev->dev, "GPU mode1 reset\n");
4700
4701 /* disable BM */
4702 pci_clear_master(adev->pdev);
4703
4704 amdgpu_device_cache_pci_state(adev->pdev);
4705
4706 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4707 dev_info(adev->dev, "GPU smu mode1 reset\n");
4708 ret = amdgpu_dpm_mode1_reset(adev);
4709 } else {
4710 dev_info(adev->dev, "GPU psp mode1 reset\n");
4711 ret = psp_gpu_reset(adev);
4712 }
4713
4714 if (ret)
4715 dev_err(adev->dev, "GPU mode1 reset failed\n");
4716
4717 amdgpu_device_load_pci_state(adev->pdev);
4718
4719 /* wait for asic to come out of reset */
4720 for (i = 0; i < adev->usec_timeout; i++) {
4721 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4722
4723 if (memsize != 0xffffffff)
4724 break;
4725 udelay(1);
4726 }
4727
4728 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4729 return ret;
4730}
5c6dd71e 4731
e3c1b071 4732int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4733 struct amdgpu_reset_context *reset_context)
26bc5340 4734{
5c1e6fa4 4735 int i, r = 0;
04442bf7
LL
4736 struct amdgpu_job *job = NULL;
4737 bool need_full_reset =
4738 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4739
4740 if (reset_context->reset_req_dev == adev)
4741 job = reset_context->job;
71182665 4742
b602ca5f
TZ
4743 if (amdgpu_sriov_vf(adev)) {
4744 /* stop the data exchange thread */
4745 amdgpu_virt_fini_data_exchange(adev);
4746 }
4747
9e225fb9
AG
4748 amdgpu_fence_driver_isr_toggle(adev, true);
4749
71182665 4750 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4751 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4752 struct amdgpu_ring *ring = adev->rings[i];
4753
51687759 4754 if (!ring || !ring->sched.thread)
0875dc9e 4755 continue;
5740682e 4756
c530b02f
JZ
4757 /*clear job fence from fence drv to avoid force_completion
4758 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4759 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4760
2f9d4084
ML
4761 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4762 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4763 }
d38ceaf9 4764
9e225fb9
AG
4765 amdgpu_fence_driver_isr_toggle(adev, false);
4766
ff99849b 4767 if (job && job->vm)
222b5f04
AG
4768 drm_sched_increase_karma(&job->base);
4769
04442bf7 4770 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4771 /* If reset handler not implemented, continue; otherwise return */
4772 if (r == -ENOSYS)
4773 r = 0;
4774 else
04442bf7
LL
4775 return r;
4776
1d721ed6 4777 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4778 if (!amdgpu_sriov_vf(adev)) {
4779
4780 if (!need_full_reset)
4781 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4782
360cd081
LG
4783 if (!need_full_reset && amdgpu_gpu_recovery &&
4784 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4785 amdgpu_device_ip_pre_soft_reset(adev);
4786 r = amdgpu_device_ip_soft_reset(adev);
4787 amdgpu_device_ip_post_soft_reset(adev);
4788 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4789 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4790 need_full_reset = true;
4791 }
4792 }
4793
4794 if (need_full_reset)
4795 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4796 if (need_full_reset)
4797 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4798 else
4799 clear_bit(AMDGPU_NEED_FULL_RESET,
4800 &reset_context->flags);
26bc5340
AG
4801 }
4802
4803 return r;
4804}
4805
15fd09a0
SA
4806static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4807{
15fd09a0
SA
4808 int i;
4809
38a15ad9 4810 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4811
4812 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4813 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4814 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4815 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4816 }
4817
4818 return 0;
4819}
4820
3d8785f6
SA
4821#ifdef CONFIG_DEV_COREDUMP
4822static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4823 size_t count, void *data, size_t datalen)
4824{
4825 struct drm_printer p;
4826 struct amdgpu_device *adev = data;
4827 struct drm_print_iterator iter;
4828 int i;
4829
4830 iter.data = buffer;
4831 iter.offset = 0;
4832 iter.start = offset;
4833 iter.remain = count;
4834
4835 p = drm_coredump_printer(&iter);
4836
4837 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4838 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4839 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4840 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4841 if (adev->reset_task_info.pid)
4842 drm_printf(&p, "process_name: %s PID: %d\n",
4843 adev->reset_task_info.process_name,
4844 adev->reset_task_info.pid);
4845
4846 if (adev->reset_vram_lost)
4847 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4848 if (adev->num_regs) {
4849 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4850
4851 for (i = 0; i < adev->num_regs; i++)
4852 drm_printf(&p, "0x%08x: 0x%08x\n",
4853 adev->reset_dump_reg_list[i],
4854 adev->reset_dump_reg_value[i]);
4855 }
4856
4857 return count - iter.remain;
4858}
4859
4860static void amdgpu_devcoredump_free(void *data)
4861{
4862}
4863
4864static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4865{
4866 struct drm_device *dev = adev_to_drm(adev);
4867
4868 ktime_get_ts64(&adev->reset_time);
4869 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4870 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4871}
4872#endif
4873
04442bf7
LL
4874int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4875 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4876{
4877 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4878 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4879 int r = 0;
f5c7e779 4880 bool gpu_reset_for_dev_remove = 0;
26bc5340 4881
04442bf7
LL
4882 /* Try reset handler method first */
4883 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4884 reset_list);
15fd09a0 4885 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4886
4887 reset_context->reset_device_list = device_list_handle;
04442bf7 4888 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4889 /* If reset handler not implemented, continue; otherwise return */
4890 if (r == -ENOSYS)
4891 r = 0;
4892 else
04442bf7
LL
4893 return r;
4894
4895 /* Reset handler not implemented, use the default method */
4896 need_full_reset =
4897 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4898 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4899
f5c7e779
YC
4900 gpu_reset_for_dev_remove =
4901 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4902 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4903
26bc5340 4904 /*
655ce9cb 4905 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4906 * to allow proper links negotiation in FW (within 1 sec)
4907 */
7ac71382 4908 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4909 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4910 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4911 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4912 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4913 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4914 r = -EALREADY;
4915 } else
4916 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4917
041a62bc 4918 if (r) {
aac89168 4919 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4920 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4921 break;
ce316fa5
LM
4922 }
4923 }
4924
041a62bc
AG
4925 /* For XGMI wait for all resets to complete before proceed */
4926 if (!r) {
655ce9cb 4927 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4928 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4929 flush_work(&tmp_adev->xgmi_reset_work);
4930 r = tmp_adev->asic_reset_res;
4931 if (r)
4932 break;
ce316fa5
LM
4933 }
4934 }
4935 }
ce316fa5 4936 }
26bc5340 4937
43c4d576 4938 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4939 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4940 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4941 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4942 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4943 }
4944
00eaa571 4945 amdgpu_ras_intr_cleared();
43c4d576 4946 }
00eaa571 4947
f5c7e779
YC
4948 /* Since the mode1 reset affects base ip blocks, the
4949 * phase1 ip blocks need to be resumed. Otherwise there
4950 * will be a BIOS signature error and the psp bootloader
4951 * can't load kdb on the next amdgpu install.
4952 */
4953 if (gpu_reset_for_dev_remove) {
4954 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4955 amdgpu_device_ip_resume_phase1(tmp_adev);
4956
4957 goto end;
4958 }
4959
655ce9cb 4960 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4961 if (need_full_reset) {
4962 /* post card */
e3c1b071 4963 r = amdgpu_device_asic_init(tmp_adev);
4964 if (r) {
aac89168 4965 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4966 } else {
26bc5340 4967 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4968 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4969 if (r)
4970 goto out;
4971
26bc5340
AG
4972 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4973 if (r)
4974 goto out;
4975
4976 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4977#ifdef CONFIG_DEV_COREDUMP
4978 tmp_adev->reset_vram_lost = vram_lost;
4979 memset(&tmp_adev->reset_task_info, 0,
4980 sizeof(tmp_adev->reset_task_info));
4981 if (reset_context->job && reset_context->job->vm)
4982 tmp_adev->reset_task_info =
4983 reset_context->job->vm->task_info;
4984 amdgpu_reset_capture_coredumpm(tmp_adev);
4985#endif
26bc5340 4986 if (vram_lost) {
77e7f829 4987 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4988 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4989 }
4990
26bc5340
AG
4991 r = amdgpu_device_fw_loading(tmp_adev);
4992 if (r)
4993 return r;
4994
4995 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4996 if (r)
4997 goto out;
4998
4999 if (vram_lost)
5000 amdgpu_device_fill_reset_magic(tmp_adev);
5001
fdafb359
EQ
5002 /*
5003 * Add this ASIC as tracked as reset was already
5004 * complete successfully.
5005 */
5006 amdgpu_register_gpu_instance(tmp_adev);
5007
04442bf7
LL
5008 if (!reset_context->hive &&
5009 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5010 amdgpu_xgmi_add_device(tmp_adev);
5011
7c04ca50 5012 r = amdgpu_device_ip_late_init(tmp_adev);
5013 if (r)
5014 goto out;
5015
087451f3 5016 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5017
e8fbaf03
GC
5018 /*
5019 * The GPU enters bad state once faulty pages
5020 * by ECC has reached the threshold, and ras
5021 * recovery is scheduled next. So add one check
5022 * here to break recovery if it indeed exceeds
5023 * bad page threshold, and remind user to
5024 * retire this GPU or setting one bigger
5025 * bad_page_threshold value to fix this once
5026 * probing driver again.
5027 */
11003c68 5028 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5029 /* must succeed. */
5030 amdgpu_ras_resume(tmp_adev);
5031 } else {
5032 r = -EINVAL;
5033 goto out;
5034 }
e79a04d5 5035
26bc5340 5036 /* Update PSP FW topology after reset */
04442bf7
LL
5037 if (reset_context->hive &&
5038 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5039 r = amdgpu_xgmi_update_topology(
5040 reset_context->hive, tmp_adev);
26bc5340
AG
5041 }
5042 }
5043
26bc5340
AG
5044out:
5045 if (!r) {
5046 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5047 r = amdgpu_ib_ring_tests(tmp_adev);
5048 if (r) {
5049 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5050 need_full_reset = true;
5051 r = -EAGAIN;
5052 goto end;
5053 }
5054 }
5055
5056 if (!r)
5057 r = amdgpu_device_recover_vram(tmp_adev);
5058 else
5059 tmp_adev->asic_reset_res = r;
5060 }
5061
5062end:
04442bf7
LL
5063 if (need_full_reset)
5064 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5065 else
5066 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5067 return r;
5068}
5069
e923be99 5070static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5071{
5740682e 5072
a3a09142
AD
5073 switch (amdgpu_asic_reset_method(adev)) {
5074 case AMD_RESET_METHOD_MODE1:
5075 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5076 break;
5077 case AMD_RESET_METHOD_MODE2:
5078 adev->mp1_state = PP_MP1_STATE_RESET;
5079 break;
5080 default:
5081 adev->mp1_state = PP_MP1_STATE_NONE;
5082 break;
5083 }
26bc5340 5084}
d38ceaf9 5085
e923be99 5086static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5087{
89041940 5088 amdgpu_vf_error_trans_all(adev);
a3a09142 5089 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5090}
5091
3f12acc8
EQ
5092static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5093{
5094 struct pci_dev *p = NULL;
5095
5096 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5097 adev->pdev->bus->number, 1);
5098 if (p) {
5099 pm_runtime_enable(&(p->dev));
5100 pm_runtime_resume(&(p->dev));
5101 }
b85e285e
YY
5102
5103 pci_dev_put(p);
3f12acc8
EQ
5104}
5105
5106static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5107{
5108 enum amd_reset_method reset_method;
5109 struct pci_dev *p = NULL;
5110 u64 expires;
5111
5112 /*
5113 * For now, only BACO and mode1 reset are confirmed
5114 * to suffer the audio issue without proper suspended.
5115 */
5116 reset_method = amdgpu_asic_reset_method(adev);
5117 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5118 (reset_method != AMD_RESET_METHOD_MODE1))
5119 return -EINVAL;
5120
5121 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5122 adev->pdev->bus->number, 1);
5123 if (!p)
5124 return -ENODEV;
5125
5126 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5127 if (!expires)
5128 /*
5129 * If we cannot get the audio device autosuspend delay,
5130 * a fixed 4S interval will be used. Considering 3S is
5131 * the audio controller default autosuspend delay setting.
5132 * 4S used here is guaranteed to cover that.
5133 */
54b7feb9 5134 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5135
5136 while (!pm_runtime_status_suspended(&(p->dev))) {
5137 if (!pm_runtime_suspend(&(p->dev)))
5138 break;
5139
5140 if (expires < ktime_get_mono_fast_ns()) {
5141 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5142 pci_dev_put(p);
3f12acc8
EQ
5143 /* TODO: abort the succeeding gpu reset? */
5144 return -ETIMEDOUT;
5145 }
5146 }
5147
5148 pm_runtime_disable(&(p->dev));
5149
b85e285e 5150 pci_dev_put(p);
3f12acc8
EQ
5151 return 0;
5152}
5153
d193b12b 5154static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5155{
5156 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5157
5158#if defined(CONFIG_DEBUG_FS)
5159 if (!amdgpu_sriov_vf(adev))
5160 cancel_work(&adev->reset_work);
5161#endif
5162
5163 if (adev->kfd.dev)
5164 cancel_work(&adev->kfd.reset_work);
5165
5166 if (amdgpu_sriov_vf(adev))
5167 cancel_work(&adev->virt.flr_work);
5168
5169 if (con && adev->ras_enabled)
5170 cancel_work(&con->recovery_work);
5171
5172}
5173
26bc5340 5174/**
6e9c65f7 5175 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5176 *
982a820b 5177 * @adev: amdgpu_device pointer
26bc5340 5178 * @job: which job trigger hang
80bd2de1 5179 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5180 *
5181 * Attempt to reset the GPU if it has hung (all asics).
5182 * Attempt to do soft-reset or full-reset and reinitialize Asic
5183 * Returns 0 for success or an error on failure.
5184 */
5185
cf727044 5186int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5187 struct amdgpu_job *job,
5188 struct amdgpu_reset_context *reset_context)
26bc5340 5189{
1d721ed6 5190 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5191 bool job_signaled = false;
26bc5340 5192 struct amdgpu_hive_info *hive = NULL;
26bc5340 5193 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5194 int i, r = 0;
bb5c7235 5195 bool need_emergency_restart = false;
3f12acc8 5196 bool audio_suspended = false;
f5c7e779
YC
5197 bool gpu_reset_for_dev_remove = false;
5198
5199 gpu_reset_for_dev_remove =
5200 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5201 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5202
6e3cd2a9 5203 /*
bb5c7235
WS
5204 * Special case: RAS triggered and full reset isn't supported
5205 */
5206 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5207
d5ea093e
AG
5208 /*
5209 * Flush RAM to disk so that after reboot
5210 * the user can read log and see why the system rebooted.
5211 */
bb5c7235 5212 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5213 DRM_WARN("Emergency reboot.");
5214
5215 ksys_sync_helper();
5216 emergency_restart();
5217 }
5218
b823821f 5219 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5220 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5221
175ac6ec
ZL
5222 if (!amdgpu_sriov_vf(adev))
5223 hive = amdgpu_get_xgmi_hive(adev);
681260df 5224 if (hive)
53b3f8f4 5225 mutex_lock(&hive->hive_lock);
26bc5340 5226
f1549c09
LG
5227 reset_context->job = job;
5228 reset_context->hive = hive;
9e94d22c
EQ
5229 /*
5230 * Build list of devices to reset.
5231 * In case we are in XGMI hive mode, resort the device list
5232 * to put adev in the 1st position.
5233 */
5234 INIT_LIST_HEAD(&device_list);
175ac6ec 5235 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5236 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5237 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5238 if (gpu_reset_for_dev_remove && adev->shutdown)
5239 tmp_adev->shutdown = true;
5240 }
655ce9cb 5241 if (!list_is_first(&adev->reset_list, &device_list))
5242 list_rotate_to_front(&adev->reset_list, &device_list);
5243 device_list_handle = &device_list;
26bc5340 5244 } else {
655ce9cb 5245 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5246 device_list_handle = &device_list;
5247 }
5248
e923be99
AG
5249 /* We need to lock reset domain only once both for XGMI and single device */
5250 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5251 reset_list);
3675c2f2 5252 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5253
1d721ed6 5254 /* block all schedulers and reset given job's ring */
655ce9cb 5255 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5256
e923be99 5257 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5258
3f12acc8
EQ
5259 /*
5260 * Try to put the audio codec into suspend state
5261 * before gpu reset started.
5262 *
5263 * Due to the power domain of the graphics device
5264 * is shared with AZ power domain. Without this,
5265 * we may change the audio hardware from behind
5266 * the audio driver's back. That will trigger
5267 * some audio codec errors.
5268 */
5269 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5270 audio_suspended = true;
5271
9e94d22c
EQ
5272 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5273
52fb44cf
EQ
5274 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5275
c004d44e 5276 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5277 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5278
12ffa55d
AG
5279 /*
5280 * Mark these ASICs to be reseted as untracked first
5281 * And add them back after reset completed
5282 */
5283 amdgpu_unregister_gpu_instance(tmp_adev);
5284
163d4cd2 5285 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5286
f1c1314b 5287 /* disable ras on ALL IPs */
bb5c7235 5288 if (!need_emergency_restart &&
b823821f 5289 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5290 amdgpu_ras_suspend(tmp_adev);
5291
1d721ed6
AG
5292 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5293 struct amdgpu_ring *ring = tmp_adev->rings[i];
5294
5295 if (!ring || !ring->sched.thread)
5296 continue;
5297
0b2d2c2e 5298 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5299
bb5c7235 5300 if (need_emergency_restart)
7c6e68c7 5301 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5302 }
8f8c80f4 5303 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5304 }
5305
bb5c7235 5306 if (need_emergency_restart)
7c6e68c7
AG
5307 goto skip_sched_resume;
5308
1d721ed6
AG
5309 /*
5310 * Must check guilty signal here since after this point all old
5311 * HW fences are force signaled.
5312 *
5313 * job->base holds a reference to parent fence
5314 */
f6a3f660 5315 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5316 job_signaled = true;
1d721ed6
AG
5317 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5318 goto skip_hw_reset;
5319 }
5320
26bc5340 5321retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5322 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5323 if (gpu_reset_for_dev_remove) {
5324 /* Workaroud for ASICs need to disable SMC first */
5325 amdgpu_device_smu_fini_early(tmp_adev);
5326 }
f1549c09 5327 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5328 /*TODO Should we stop ?*/
5329 if (r) {
aac89168 5330 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5331 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5332 tmp_adev->asic_reset_res = r;
5333 }
247c7b0d
AG
5334
5335 /*
5336 * Drop all pending non scheduler resets. Scheduler resets
5337 * were already dropped during drm_sched_stop
5338 */
d193b12b 5339 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5340 }
5341
5342 /* Actual ASIC resets if needed.*/
4f30d920 5343 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5344 if (amdgpu_sriov_vf(adev)) {
5345 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5346 if (r)
5347 adev->asic_reset_res = r;
950d6425
SY
5348
5349 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5350 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5351 amdgpu_ras_resume(adev);
26bc5340 5352 } else {
f1549c09 5353 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5354 if (r && r == -EAGAIN)
26bc5340 5355 goto retry;
f5c7e779
YC
5356
5357 if (!r && gpu_reset_for_dev_remove)
5358 goto recover_end;
26bc5340
AG
5359 }
5360
1d721ed6
AG
5361skip_hw_reset:
5362
26bc5340 5363 /* Post ASIC reset for all devs .*/
655ce9cb 5364 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5365
1d721ed6
AG
5366 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5367 struct amdgpu_ring *ring = tmp_adev->rings[i];
5368
5369 if (!ring || !ring->sched.thread)
5370 continue;
5371
6868a2c4 5372 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5373 }
5374
693073a0 5375 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5376 amdgpu_mes_self_test(tmp_adev);
5377
1053b9c9 5378 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5379 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5380 }
5381
7258fa31
SK
5382 if (tmp_adev->asic_reset_res)
5383 r = tmp_adev->asic_reset_res;
5384
1d721ed6 5385 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5386
5387 if (r) {
5388 /* bad news, how to tell it to userspace ? */
12ffa55d 5389 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5390 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5391 } else {
12ffa55d 5392 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5393 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5394 DRM_WARN("smart shift update failed\n");
26bc5340 5395 }
7c6e68c7 5396 }
26bc5340 5397
7c6e68c7 5398skip_sched_resume:
655ce9cb 5399 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5400 /* unlock kfd: SRIOV would do it separately */
c004d44e 5401 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5402 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5403
5404 /* kfd_post_reset will do nothing if kfd device is not initialized,
5405 * need to bring up kfd here if it's not be initialized before
5406 */
5407 if (!adev->kfd.init_complete)
5408 amdgpu_amdkfd_device_init(adev);
5409
3f12acc8
EQ
5410 if (audio_suspended)
5411 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5412
5413 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5414
5415 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5416 }
5417
f5c7e779 5418recover_end:
e923be99
AG
5419 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5420 reset_list);
5421 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5422
9e94d22c 5423 if (hive) {
9e94d22c 5424 mutex_unlock(&hive->hive_lock);
d95e8e97 5425 amdgpu_put_xgmi_hive(hive);
9e94d22c 5426 }
26bc5340 5427
f287a3c5 5428 if (r)
26bc5340 5429 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5430
5431 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5432 return r;
5433}
5434
e3ecdffa
AD
5435/**
5436 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5437 *
5438 * @adev: amdgpu_device pointer
5439 *
5440 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5441 * and lanes) of the slot the device is in. Handles APUs and
5442 * virtualized environments where PCIE config space may not be available.
5443 */
5494d864 5444static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5445{
5d9a6330 5446 struct pci_dev *pdev;
c5313457
HK
5447 enum pci_bus_speed speed_cap, platform_speed_cap;
5448 enum pcie_link_width platform_link_width;
d0dd7f0c 5449
cd474ba0
AD
5450 if (amdgpu_pcie_gen_cap)
5451 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5452
cd474ba0
AD
5453 if (amdgpu_pcie_lane_cap)
5454 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5455
cd474ba0
AD
5456 /* covers APUs as well */
5457 if (pci_is_root_bus(adev->pdev->bus)) {
5458 if (adev->pm.pcie_gen_mask == 0)
5459 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5460 if (adev->pm.pcie_mlw_mask == 0)
5461 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5462 return;
cd474ba0 5463 }
d0dd7f0c 5464
c5313457
HK
5465 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5466 return;
5467
dbaa922b
AD
5468 pcie_bandwidth_available(adev->pdev, NULL,
5469 &platform_speed_cap, &platform_link_width);
c5313457 5470
cd474ba0 5471 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5472 /* asic caps */
5473 pdev = adev->pdev;
5474 speed_cap = pcie_get_speed_cap(pdev);
5475 if (speed_cap == PCI_SPEED_UNKNOWN) {
5476 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5477 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5478 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5479 } else {
2b3a1f51
FX
5480 if (speed_cap == PCIE_SPEED_32_0GT)
5481 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5482 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5483 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5484 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5485 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5486 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5487 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5488 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5489 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5490 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5491 else if (speed_cap == PCIE_SPEED_8_0GT)
5492 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5493 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5494 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5495 else if (speed_cap == PCIE_SPEED_5_0GT)
5496 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5497 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5498 else
5499 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5500 }
5501 /* platform caps */
c5313457 5502 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5503 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5504 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5505 } else {
2b3a1f51
FX
5506 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5507 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5508 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5509 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5510 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5511 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5512 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5513 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5514 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5515 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5516 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5517 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5518 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5519 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5520 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5521 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5522 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5523 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5524 else
5525 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5526
cd474ba0
AD
5527 }
5528 }
5529 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5530 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5531 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5532 } else {
c5313457 5533 switch (platform_link_width) {
5d9a6330 5534 case PCIE_LNK_X32:
cd474ba0
AD
5535 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5537 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5538 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5539 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5540 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5542 break;
5d9a6330 5543 case PCIE_LNK_X16:
cd474ba0
AD
5544 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5545 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5546 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5547 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5548 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5549 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5550 break;
5d9a6330 5551 case PCIE_LNK_X12:
cd474ba0
AD
5552 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5553 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5554 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5555 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5556 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5557 break;
5d9a6330 5558 case PCIE_LNK_X8:
cd474ba0
AD
5559 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5560 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5561 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5562 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5563 break;
5d9a6330 5564 case PCIE_LNK_X4:
cd474ba0
AD
5565 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5566 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5567 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5568 break;
5d9a6330 5569 case PCIE_LNK_X2:
cd474ba0
AD
5570 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5571 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5572 break;
5d9a6330 5573 case PCIE_LNK_X1:
cd474ba0
AD
5574 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5575 break;
5576 default:
5577 break;
5578 }
d0dd7f0c
AD
5579 }
5580 }
5581}
d38ceaf9 5582
08a2fd23
RE
5583/**
5584 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5585 *
5586 * @adev: amdgpu_device pointer
5587 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5588 *
5589 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5590 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5591 * @peer_adev.
5592 */
5593bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5594 struct amdgpu_device *peer_adev)
5595{
5596#ifdef CONFIG_HSA_AMD_P2P
5597 uint64_t address_mask = peer_adev->dev->dma_mask ?
5598 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5599 resource_size_t aper_limit =
5600 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5601 bool p2p_access =
5602 !adev->gmc.xgmi.connected_to_cpu &&
5603 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5604
5605 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5606 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5607 !(adev->gmc.aper_base & address_mask ||
5608 aper_limit & address_mask));
5609#else
5610 return false;
5611#endif
5612}
5613
361dbd01
AD
5614int amdgpu_device_baco_enter(struct drm_device *dev)
5615{
1348969a 5616 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5617 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5618
6ab68650 5619 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5620 return -ENOTSUPP;
5621
8ab0d6f0 5622 if (ras && adev->ras_enabled &&
acdae216 5623 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5624 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5625
9530273e 5626 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5627}
5628
5629int amdgpu_device_baco_exit(struct drm_device *dev)
5630{
1348969a 5631 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5632 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5633 int ret = 0;
361dbd01 5634
6ab68650 5635 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5636 return -ENOTSUPP;
5637
9530273e
EQ
5638 ret = amdgpu_dpm_baco_exit(adev);
5639 if (ret)
5640 return ret;
7a22677b 5641
8ab0d6f0 5642 if (ras && adev->ras_enabled &&
acdae216 5643 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5644 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5645
1bece222
CL
5646 if (amdgpu_passthrough(adev) &&
5647 adev->nbio.funcs->clear_doorbell_interrupt)
5648 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5649
7a22677b 5650 return 0;
361dbd01 5651}
c9a6b82f
AG
5652
5653/**
5654 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5655 * @pdev: PCI device struct
5656 * @state: PCI channel state
5657 *
5658 * Description: Called when a PCI error is detected.
5659 *
5660 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5661 */
5662pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5663{
5664 struct drm_device *dev = pci_get_drvdata(pdev);
5665 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5666 int i;
c9a6b82f
AG
5667
5668 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5669
6894305c
AG
5670 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5671 DRM_WARN("No support for XGMI hive yet...");
5672 return PCI_ERS_RESULT_DISCONNECT;
5673 }
5674
e17e27f9
GC
5675 adev->pci_channel_state = state;
5676
c9a6b82f
AG
5677 switch (state) {
5678 case pci_channel_io_normal:
5679 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5680 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5681 case pci_channel_io_frozen:
5682 /*
d0fb18b5 5683 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5684 * to GPU during PCI error recovery
5685 */
3675c2f2 5686 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5687 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5688
5689 /*
5690 * Block any work scheduling as we do for regular GPU reset
5691 * for the duration of the recovery
5692 */
5693 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5694 struct amdgpu_ring *ring = adev->rings[i];
5695
5696 if (!ring || !ring->sched.thread)
5697 continue;
5698
5699 drm_sched_stop(&ring->sched, NULL);
5700 }
8f8c80f4 5701 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5702 return PCI_ERS_RESULT_NEED_RESET;
5703 case pci_channel_io_perm_failure:
5704 /* Permanent error, prepare for device removal */
5705 return PCI_ERS_RESULT_DISCONNECT;
5706 }
5707
5708 return PCI_ERS_RESULT_NEED_RESET;
5709}
5710
5711/**
5712 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5713 * @pdev: pointer to PCI device
5714 */
5715pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5716{
5717
5718 DRM_INFO("PCI error: mmio enabled callback!!\n");
5719
5720 /* TODO - dump whatever for debugging purposes */
5721
5722 /* This called only if amdgpu_pci_error_detected returns
5723 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5724 * works, no need to reset slot.
5725 */
5726
5727 return PCI_ERS_RESULT_RECOVERED;
5728}
5729
5730/**
5731 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5732 * @pdev: PCI device struct
5733 *
5734 * Description: This routine is called by the pci error recovery
5735 * code after the PCI slot has been reset, just before we
5736 * should resume normal operations.
5737 */
5738pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5739{
5740 struct drm_device *dev = pci_get_drvdata(pdev);
5741 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5742 int r, i;
04442bf7 5743 struct amdgpu_reset_context reset_context;
362c7b91 5744 u32 memsize;
7ac71382 5745 struct list_head device_list;
c9a6b82f
AG
5746
5747 DRM_INFO("PCI error: slot reset callback!!\n");
5748
04442bf7
LL
5749 memset(&reset_context, 0, sizeof(reset_context));
5750
7ac71382 5751 INIT_LIST_HEAD(&device_list);
655ce9cb 5752 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5753
362c7b91
AG
5754 /* wait for asic to come out of reset */
5755 msleep(500);
5756
7ac71382 5757 /* Restore PCI confspace */
c1dd4aa6 5758 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5759
362c7b91
AG
5760 /* confirm ASIC came out of reset */
5761 for (i = 0; i < adev->usec_timeout; i++) {
5762 memsize = amdgpu_asic_get_config_memsize(adev);
5763
5764 if (memsize != 0xffffffff)
5765 break;
5766 udelay(1);
5767 }
5768 if (memsize == 0xffffffff) {
5769 r = -ETIME;
5770 goto out;
5771 }
5772
04442bf7
LL
5773 reset_context.method = AMD_RESET_METHOD_NONE;
5774 reset_context.reset_req_dev = adev;
5775 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5776 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5777
7afefb81 5778 adev->no_hw_access = true;
04442bf7 5779 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5780 adev->no_hw_access = false;
c9a6b82f
AG
5781 if (r)
5782 goto out;
5783
04442bf7 5784 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5785
5786out:
c9a6b82f 5787 if (!r) {
c1dd4aa6
AG
5788 if (amdgpu_device_cache_pci_state(adev->pdev))
5789 pci_restore_state(adev->pdev);
5790
c9a6b82f
AG
5791 DRM_INFO("PCIe error recovery succeeded\n");
5792 } else {
5793 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5794 amdgpu_device_unset_mp1_state(adev);
5795 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5796 }
5797
5798 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5799}
5800
5801/**
5802 * amdgpu_pci_resume() - resume normal ops after PCI reset
5803 * @pdev: pointer to PCI device
5804 *
5805 * Called when the error recovery driver tells us that its
505199a3 5806 * OK to resume normal operation.
c9a6b82f
AG
5807 */
5808void amdgpu_pci_resume(struct pci_dev *pdev)
5809{
5810 struct drm_device *dev = pci_get_drvdata(pdev);
5811 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5812 int i;
c9a6b82f 5813
c9a6b82f
AG
5814
5815 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5816
e17e27f9
GC
5817 /* Only continue execution for the case of pci_channel_io_frozen */
5818 if (adev->pci_channel_state != pci_channel_io_frozen)
5819 return;
5820
acd89fca
AG
5821 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5822 struct amdgpu_ring *ring = adev->rings[i];
5823
5824 if (!ring || !ring->sched.thread)
5825 continue;
5826
acd89fca
AG
5827 drm_sched_start(&ring->sched, true);
5828 }
5829
e923be99
AG
5830 amdgpu_device_unset_mp1_state(adev);
5831 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5832}
c1dd4aa6
AG
5833
5834bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5835{
5836 struct drm_device *dev = pci_get_drvdata(pdev);
5837 struct amdgpu_device *adev = drm_to_adev(dev);
5838 int r;
5839
5840 r = pci_save_state(pdev);
5841 if (!r) {
5842 kfree(adev->pci_state);
5843
5844 adev->pci_state = pci_store_saved_state(pdev);
5845
5846 if (!adev->pci_state) {
5847 DRM_ERROR("Failed to store PCI saved state");
5848 return false;
5849 }
5850 } else {
5851 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5852 return false;
5853 }
5854
5855 return true;
5856}
5857
5858bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5859{
5860 struct drm_device *dev = pci_get_drvdata(pdev);
5861 struct amdgpu_device *adev = drm_to_adev(dev);
5862 int r;
5863
5864 if (!adev->pci_state)
5865 return false;
5866
5867 r = pci_load_saved_state(pdev, adev->pci_state);
5868
5869 if (!r) {
5870 pci_restore_state(pdev);
5871 } else {
5872 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5873 return false;
5874 }
5875
5876 return true;
5877}
5878
810085dd
EH
5879void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5880 struct amdgpu_ring *ring)
5881{
5882#ifdef CONFIG_X86_64
b818a5d3 5883 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5884 return;
5885#endif
5886 if (adev->gmc.xgmi.connected_to_cpu)
5887 return;
5888
5889 if (ring && ring->funcs->emit_hdp_flush)
5890 amdgpu_ring_emit_hdp_flush(ring);
5891 else
5892 amdgpu_asic_flush_hdp(adev, ring);
5893}
c1dd4aa6 5894
810085dd
EH
5895void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5896 struct amdgpu_ring *ring)
5897{
5898#ifdef CONFIG_X86_64
b818a5d3 5899 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5900 return;
5901#endif
5902 if (adev->gmc.xgmi.connected_to_cpu)
5903 return;
c1dd4aa6 5904
810085dd
EH
5905 amdgpu_asic_invalidate_hdp(adev, ring);
5906}
34f3a4a9 5907
89a7a870
AG
5908int amdgpu_in_reset(struct amdgpu_device *adev)
5909{
5910 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
5911}
5912
34f3a4a9
LY
5913/**
5914 * amdgpu_device_halt() - bring hardware to some kind of halt state
5915 *
5916 * @adev: amdgpu_device pointer
5917 *
5918 * Bring hardware to some kind of halt state so that no one can touch it
5919 * any more. It will help to maintain error context when error occurred.
5920 * Compare to a simple hang, the system will keep stable at least for SSH
5921 * access. Then it should be trivial to inspect the hardware state and
5922 * see what's going on. Implemented as following:
5923 *
5924 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5925 * clears all CPU mappings to device, disallows remappings through page faults
5926 * 2. amdgpu_irq_disable_all() disables all interrupts
5927 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5928 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5929 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5930 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5931 * flush any in flight DMA operations
5932 */
5933void amdgpu_device_halt(struct amdgpu_device *adev)
5934{
5935 struct pci_dev *pdev = adev->pdev;
e0f943b4 5936 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5937
5938 drm_dev_unplug(ddev);
5939
5940 amdgpu_irq_disable_all(adev);
5941
5942 amdgpu_fence_driver_hw_fini(adev);
5943
5944 adev->no_hw_access = true;
5945
5946 amdgpu_device_unmap_mmio(adev);
5947
5948 pci_disable_device(pdev);
5949 pci_wait_for_pending_transaction(pdev);
5950}
86700a40
XD
5951
5952u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5953 u32 reg)
5954{
5955 unsigned long flags, address, data;
5956 u32 r;
5957
5958 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5959 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5960
5961 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5962 WREG32(address, reg * 4);
5963 (void)RREG32(address);
5964 r = RREG32(data);
5965 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5966 return r;
5967}
5968
5969void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5970 u32 reg, u32 v)
5971{
5972 unsigned long flags, address, data;
5973
5974 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5975 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5976
5977 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5978 WREG32(address, reg * 4);
5979 (void)RREG32(address);
5980 WREG32(data, v);
5981 (void)RREG32(data);
5982 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5983}
68ce8b24
CK
5984
5985/**
5986 * amdgpu_device_switch_gang - switch to a new gang
5987 * @adev: amdgpu_device pointer
5988 * @gang: the gang to switch to
5989 *
5990 * Try to switch to a new gang.
5991 * Returns: NULL if we switched to the new gang or a reference to the current
5992 * gang leader.
5993 */
5994struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5995 struct dma_fence *gang)
5996{
5997 struct dma_fence *old = NULL;
5998
5999 do {
6000 dma_fence_put(old);
6001 rcu_read_lock();
6002 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6003 rcu_read_unlock();
6004
6005 if (old == gang)
6006 break;
6007
6008 if (!dma_fence_is_signaled(old))
6009 return old;
6010
6011 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6012 old, gang) != old);
6013
6014 dma_fence_put(old);
6015 return NULL;
6016}
220c8cc8
AD
6017
6018bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6019{
6020 switch (adev->asic_type) {
6021#ifdef CONFIG_DRM_AMDGPU_SI
6022 case CHIP_HAINAN:
6023#endif
6024 case CHIP_TOPAZ:
6025 /* chips with no display hardware */
6026 return false;
6027#ifdef CONFIG_DRM_AMDGPU_SI
6028 case CHIP_TAHITI:
6029 case CHIP_PITCAIRN:
6030 case CHIP_VERDE:
6031 case CHIP_OLAND:
6032#endif
6033#ifdef CONFIG_DRM_AMDGPU_CIK
6034 case CHIP_BONAIRE:
6035 case CHIP_HAWAII:
6036 case CHIP_KAVERI:
6037 case CHIP_KABINI:
6038 case CHIP_MULLINS:
6039#endif
6040 case CHIP_TONGA:
6041 case CHIP_FIJI:
6042 case CHIP_POLARIS10:
6043 case CHIP_POLARIS11:
6044 case CHIP_POLARIS12:
6045 case CHIP_VEGAM:
6046 case CHIP_CARRIZO:
6047 case CHIP_STONEY:
6048 /* chips with display hardware */
6049 return true;
6050 default:
6051 /* IP discovery */
6052 if (!adev->ip_versions[DCE_HWIP][0] ||
6053 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6054 return false;
6055 return true;
6056 }
6057}