drm/amdgpu: Increase Max GPU instance to 64
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
162static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166
KR
167/**
168 * DOC: product_name
169 *
170 * The amdgpu driver provides a sysfs API for reporting the product name
171 * for the device
2c496a6c 172 * The file product_name is used for this and returns the product name
bd607166
KR
173 * as returned from the FRU.
174 * NOTE: This is only available for certain server cards
175 */
176
177static ssize_t amdgpu_device_get_product_name(struct device *dev,
178 struct device_attribute *attr, char *buf)
179{
180 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 181 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 182
36000c7a 183 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
184}
185
186static DEVICE_ATTR(product_name, S_IRUGO,
187 amdgpu_device_get_product_name, NULL);
188
189/**
190 * DOC: product_number
191 *
192 * The amdgpu driver provides a sysfs API for reporting the part number
193 * for the device
2c496a6c 194 * The file product_number is used for this and returns the part number
bd607166
KR
195 * as returned from the FRU.
196 * NOTE: This is only available for certain server cards
197 */
198
199static ssize_t amdgpu_device_get_product_number(struct device *dev,
200 struct device_attribute *attr, char *buf)
201{
202 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 203 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 204
36000c7a 205 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
206}
207
208static DEVICE_ATTR(product_number, S_IRUGO,
209 amdgpu_device_get_product_number, NULL);
210
211/**
212 * DOC: serial_number
213 *
214 * The amdgpu driver provides a sysfs API for reporting the serial number
215 * for the device
216 * The file serial_number is used for this and returns the serial number
217 * as returned from the FRU.
218 * NOTE: This is only available for certain server cards
219 */
220
221static ssize_t amdgpu_device_get_serial_number(struct device *dev,
222 struct device_attribute *attr, char *buf)
223{
224 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 225 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 226
36000c7a 227 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
228}
229
230static DEVICE_ATTR(serial_number, S_IRUGO,
231 amdgpu_device_get_serial_number, NULL);
232
fd496ca8 233/**
b98c6299 234 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
235 *
236 * @dev: drm_device pointer
237 *
b98c6299 238 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
239 * otherwise return false.
240 */
b98c6299 241bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
242{
243 struct amdgpu_device *adev = drm_to_adev(dev);
244
b98c6299 245 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
246 return true;
247 return false;
248}
249
e3ecdffa 250/**
0330b848 251 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
252 *
253 * @dev: drm_device pointer
254 *
b98c6299 255 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
256 * otherwise return false.
257 */
31af062a 258bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 259{
1348969a 260 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 261
b98c6299
AD
262 if (adev->has_pr3 ||
263 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
264 return true;
265 return false;
266}
267
a69cba42
AD
268/**
269 * amdgpu_device_supports_baco - Does the device support BACO
270 *
271 * @dev: drm_device pointer
272 *
273 * Returns true if the device supporte BACO,
274 * otherwise return false.
275 */
276bool amdgpu_device_supports_baco(struct drm_device *dev)
277{
1348969a 278 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
279
280 return amdgpu_asic_supports_baco(adev);
281}
282
3fa8f89d
S
283/**
284 * amdgpu_device_supports_smart_shift - Is the device dGPU with
285 * smart shift support
286 *
287 * @dev: drm_device pointer
288 *
289 * Returns true if the device is a dGPU with Smart Shift support,
290 * otherwise returns false.
291 */
292bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
293{
294 return (amdgpu_device_supports_boco(dev) &&
295 amdgpu_acpi_is_power_shift_control_supported());
296}
297
6e3cd2a9
MCC
298/*
299 * VRAM access helper functions
300 */
301
e35e2b11 302/**
048af66b 303 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
304 *
305 * @adev: amdgpu_device pointer
306 * @pos: offset of the buffer in vram
307 * @buf: virtual address of the buffer in system memory
308 * @size: read/write size, sizeof(@buf) must > @size
309 * @write: true - write to vram, otherwise - read from vram
310 */
048af66b
KW
311void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
312 void *buf, size_t size, bool write)
e35e2b11 313{
e35e2b11 314 unsigned long flags;
048af66b
KW
315 uint32_t hi = ~0, tmp = 0;
316 uint32_t *data = buf;
ce05ac56 317 uint64_t last;
f89f8c6b 318 int idx;
ce05ac56 319
c58a863b 320 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 321 return;
9d11eb0d 322
048af66b
KW
323 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
324
325 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
326 for (last = pos + size; pos < last; pos += 4) {
327 tmp = pos >> 31;
328
329 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
330 if (tmp != hi) {
331 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
332 hi = tmp;
333 }
334 if (write)
335 WREG32_NO_KIQ(mmMM_DATA, *data++);
336 else
337 *data++ = RREG32_NO_KIQ(mmMM_DATA);
338 }
339
340 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
341 drm_dev_exit(idx);
342}
343
344/**
bbe04dec 345 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
346 *
347 * @adev: amdgpu_device pointer
348 * @pos: offset of the buffer in vram
349 * @buf: virtual address of the buffer in system memory
350 * @size: read/write size, sizeof(@buf) must > @size
351 * @write: true - write to vram, otherwise - read from vram
352 *
353 * The return value means how many bytes have been transferred.
354 */
355size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
356 void *buf, size_t size, bool write)
357{
9d11eb0d 358#ifdef CONFIG_64BIT
048af66b
KW
359 void __iomem *addr;
360 size_t count = 0;
361 uint64_t last;
362
363 if (!adev->mman.aper_base_kaddr)
364 return 0;
365
9d11eb0d
CK
366 last = min(pos + size, adev->gmc.visible_vram_size);
367 if (last > pos) {
048af66b
KW
368 addr = adev->mman.aper_base_kaddr + pos;
369 count = last - pos;
9d11eb0d
CK
370
371 if (write) {
372 memcpy_toio(addr, buf, count);
373 mb();
810085dd 374 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 375 } else {
810085dd 376 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
377 mb();
378 memcpy_fromio(buf, addr, count);
379 }
380
9d11eb0d 381 }
048af66b
KW
382
383 return count;
384#else
385 return 0;
9d11eb0d 386#endif
048af66b 387}
9d11eb0d 388
048af66b
KW
389/**
390 * amdgpu_device_vram_access - read/write a buffer in vram
391 *
392 * @adev: amdgpu_device pointer
393 * @pos: offset of the buffer in vram
394 * @buf: virtual address of the buffer in system memory
395 * @size: read/write size, sizeof(@buf) must > @size
396 * @write: true - write to vram, otherwise - read from vram
397 */
398void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
399 void *buf, size_t size, bool write)
400{
401 size_t count;
e35e2b11 402
048af66b
KW
403 /* try to using vram apreature to access vram first */
404 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
405 size -= count;
406 if (size) {
407 /* using MM to access rest vram */
408 pos += count;
409 buf += count;
410 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
411 }
412}
413
d38ceaf9 414/*
f7ee1874 415 * register access helper functions.
d38ceaf9 416 */
56b53c0b
DL
417
418/* Check if hw access should be skipped because of hotplug or device error */
419bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
420{
7afefb81 421 if (adev->no_hw_access)
56b53c0b
DL
422 return true;
423
424#ifdef CONFIG_LOCKDEP
425 /*
426 * This is a bit complicated to understand, so worth a comment. What we assert
427 * here is that the GPU reset is not running on another thread in parallel.
428 *
429 * For this we trylock the read side of the reset semaphore, if that succeeds
430 * we know that the reset is not running in paralell.
431 *
432 * If the trylock fails we assert that we are either already holding the read
433 * side of the lock or are the reset thread itself and hold the write side of
434 * the lock.
435 */
436 if (in_task()) {
d0fb18b5
AG
437 if (down_read_trylock(&adev->reset_domain->sem))
438 up_read(&adev->reset_domain->sem);
56b53c0b 439 else
d0fb18b5 440 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
441 }
442#endif
443 return false;
444}
445
e3ecdffa 446/**
f7ee1874 447 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
448 *
449 * @adev: amdgpu_device pointer
450 * @reg: dword aligned register offset
451 * @acc_flags: access flags which require special behavior
452 *
453 * Returns the 32 bit value from the offset specified.
454 */
f7ee1874
HZ
455uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
456 uint32_t reg, uint32_t acc_flags)
d38ceaf9 457{
f4b373f4
TSD
458 uint32_t ret;
459
56b53c0b 460 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
461 return 0;
462
f7ee1874
HZ
463 if ((reg * 4) < adev->rmmio_size) {
464 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
465 amdgpu_sriov_runtime(adev) &&
d0fb18b5 466 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 467 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 468 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
469 } else {
470 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
471 }
472 } else {
473 ret = adev->pcie_rreg(adev, reg * 4);
81202807 474 }
bc992ba5 475
f7ee1874 476 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 477
f4b373f4 478 return ret;
d38ceaf9
AD
479}
480
421a2a30
ML
481/*
482 * MMIO register read with bytes helper functions
483 * @offset:bytes offset from MMIO start
484 *
485*/
486
e3ecdffa
AD
487/**
488 * amdgpu_mm_rreg8 - read a memory mapped IO register
489 *
490 * @adev: amdgpu_device pointer
491 * @offset: byte aligned register offset
492 *
493 * Returns the 8 bit value from the offset specified.
494 */
7cbbc745
AG
495uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
496{
56b53c0b 497 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
498 return 0;
499
421a2a30
ML
500 if (offset < adev->rmmio_size)
501 return (readb(adev->rmmio + offset));
502 BUG();
503}
504
505/*
506 * MMIO register write with bytes helper functions
507 * @offset:bytes offset from MMIO start
508 * @value: the value want to be written to the register
509 *
510*/
e3ecdffa
AD
511/**
512 * amdgpu_mm_wreg8 - read a memory mapped IO register
513 *
514 * @adev: amdgpu_device pointer
515 * @offset: byte aligned register offset
516 * @value: 8 bit value to write
517 *
518 * Writes the value specified to the offset specified.
519 */
7cbbc745
AG
520void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
521{
56b53c0b 522 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
523 return;
524
421a2a30
ML
525 if (offset < adev->rmmio_size)
526 writeb(value, adev->rmmio + offset);
527 else
528 BUG();
529}
530
e3ecdffa 531/**
f7ee1874 532 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
533 *
534 * @adev: amdgpu_device pointer
535 * @reg: dword aligned register offset
536 * @v: 32 bit value to write to the register
537 * @acc_flags: access flags which require special behavior
538 *
539 * Writes the value specified to the offset specified.
540 */
f7ee1874
HZ
541void amdgpu_device_wreg(struct amdgpu_device *adev,
542 uint32_t reg, uint32_t v,
543 uint32_t acc_flags)
d38ceaf9 544{
56b53c0b 545 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
546 return;
547
f7ee1874
HZ
548 if ((reg * 4) < adev->rmmio_size) {
549 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
550 amdgpu_sriov_runtime(adev) &&
d0fb18b5 551 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 552 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 553 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
554 } else {
555 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
556 }
557 } else {
558 adev->pcie_wreg(adev, reg * 4, v);
81202807 559 }
bc992ba5 560
f7ee1874 561 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 562}
d38ceaf9 563
03f2abb0 564/**
4cc9f86f 565 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 566 *
71579346
RB
567 * @adev: amdgpu_device pointer
568 * @reg: mmio/rlc register
569 * @v: value to write
570 *
571 * this function is invoked only for the debugfs register access
03f2abb0 572 */
f7ee1874
HZ
573void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
574 uint32_t reg, uint32_t v)
2e0cc4d4 575{
56b53c0b 576 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
577 return;
578
2e0cc4d4 579 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
580 adev->gfx.rlc.funcs &&
581 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 582 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 583 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
584 } else if ((reg * 4) >= adev->rmmio_size) {
585 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
586 } else {
587 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 588 }
d38ceaf9
AD
589}
590
d38ceaf9
AD
591/**
592 * amdgpu_mm_rdoorbell - read a doorbell dword
593 *
594 * @adev: amdgpu_device pointer
595 * @index: doorbell index
596 *
597 * Returns the value in the doorbell aperture at the
598 * requested doorbell index (CIK).
599 */
600u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
601{
56b53c0b 602 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
603 return 0;
604
0512e9ff 605 if (index < adev->doorbell.num_kernel_doorbells) {
d38ceaf9
AD
606 return readl(adev->doorbell.ptr + index);
607 } else {
608 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
609 return 0;
610 }
611}
612
613/**
614 * amdgpu_mm_wdoorbell - write a doorbell dword
615 *
616 * @adev: amdgpu_device pointer
617 * @index: doorbell index
618 * @v: value to write
619 *
620 * Writes @v to the doorbell aperture at the
621 * requested doorbell index (CIK).
622 */
623void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
624{
56b53c0b 625 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
626 return;
627
0512e9ff 628 if (index < adev->doorbell.num_kernel_doorbells) {
d38ceaf9
AD
629 writel(v, adev->doorbell.ptr + index);
630 } else {
631 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
632 }
633}
634
832be404
KW
635/**
636 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
637 *
638 * @adev: amdgpu_device pointer
639 * @index: doorbell index
640 *
641 * Returns the value in the doorbell aperture at the
642 * requested doorbell index (VEGA10+).
643 */
644u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
645{
56b53c0b 646 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
647 return 0;
648
0512e9ff 649 if (index < adev->doorbell.num_kernel_doorbells) {
832be404
KW
650 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
651 } else {
652 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
653 return 0;
654 }
655}
656
657/**
658 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
659 *
660 * @adev: amdgpu_device pointer
661 * @index: doorbell index
662 * @v: value to write
663 *
664 * Writes @v to the doorbell aperture at the
665 * requested doorbell index (VEGA10+).
666 */
667void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
668{
56b53c0b 669 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
670 return;
671
0512e9ff 672 if (index < adev->doorbell.num_kernel_doorbells) {
832be404
KW
673 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
674 } else {
675 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
676 }
677}
678
1bba3683
HZ
679/**
680 * amdgpu_device_indirect_rreg - read an indirect register
681 *
682 * @adev: amdgpu_device pointer
22f453fb 683 * @reg_addr: indirect register address to read from
1bba3683
HZ
684 *
685 * Returns the value of indirect register @reg_addr
686 */
687u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
688 u32 reg_addr)
689{
65ba96e9 690 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
691 void __iomem *pcie_index_offset;
692 void __iomem *pcie_data_offset;
65ba96e9
HZ
693 u32 r;
694
695 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
696 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
697
698 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
699 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
700 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
701
702 writel(reg_addr, pcie_index_offset);
703 readl(pcie_index_offset);
704 r = readl(pcie_data_offset);
705 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
706
707 return r;
708}
709
0c552ed3
LM
710u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
711 u64 reg_addr)
712{
713 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
714 u32 r;
715 void __iomem *pcie_index_offset;
716 void __iomem *pcie_index_hi_offset;
717 void __iomem *pcie_data_offset;
718
719 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
720 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
721 if (adev->nbio.funcs->get_pcie_index_hi_offset)
722 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
723 else
724 pcie_index_hi = 0;
725
726 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
727 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
728 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
729 if (pcie_index_hi != 0)
730 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
731 pcie_index_hi * 4;
732
733 writel(reg_addr, pcie_index_offset);
734 readl(pcie_index_offset);
735 if (pcie_index_hi != 0) {
736 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
737 readl(pcie_index_hi_offset);
738 }
739 r = readl(pcie_data_offset);
740
741 /* clear the high bits */
742 if (pcie_index_hi != 0) {
743 writel(0, pcie_index_hi_offset);
744 readl(pcie_index_hi_offset);
745 }
746
747 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
748
749 return r;
750}
751
1bba3683
HZ
752/**
753 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
754 *
755 * @adev: amdgpu_device pointer
22f453fb 756 * @reg_addr: indirect register address to read from
1bba3683
HZ
757 *
758 * Returns the value of indirect register @reg_addr
759 */
760u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
761 u32 reg_addr)
762{
65ba96e9 763 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
764 void __iomem *pcie_index_offset;
765 void __iomem *pcie_data_offset;
65ba96e9
HZ
766 u64 r;
767
768 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
769 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
770
771 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
772 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
773 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
774
775 /* read low 32 bits */
776 writel(reg_addr, pcie_index_offset);
777 readl(pcie_index_offset);
778 r = readl(pcie_data_offset);
779 /* read high 32 bits */
780 writel(reg_addr + 4, pcie_index_offset);
781 readl(pcie_index_offset);
782 r |= ((u64)readl(pcie_data_offset) << 32);
783 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
784
785 return r;
786}
787
788/**
789 * amdgpu_device_indirect_wreg - write an indirect register address
790 *
791 * @adev: amdgpu_device pointer
792 * @pcie_index: mmio register offset
793 * @pcie_data: mmio register offset
794 * @reg_addr: indirect register offset
795 * @reg_data: indirect register data
796 *
797 */
798void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
799 u32 reg_addr, u32 reg_data)
800{
65ba96e9 801 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
802 void __iomem *pcie_index_offset;
803 void __iomem *pcie_data_offset;
804
65ba96e9
HZ
805 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
806 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
807
1bba3683
HZ
808 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
809 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
810 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
811
812 writel(reg_addr, pcie_index_offset);
813 readl(pcie_index_offset);
814 writel(reg_data, pcie_data_offset);
815 readl(pcie_data_offset);
816 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
817}
818
0c552ed3
LM
819void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
820 u64 reg_addr, u32 reg_data)
821{
822 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
823 void __iomem *pcie_index_offset;
824 void __iomem *pcie_index_hi_offset;
825 void __iomem *pcie_data_offset;
826
827 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
828 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
829 if (adev->nbio.funcs->get_pcie_index_hi_offset)
830 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
831 else
832 pcie_index_hi = 0;
833
834 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
835 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
836 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
837 if (pcie_index_hi != 0)
838 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
839 pcie_index_hi * 4;
840
841 writel(reg_addr, pcie_index_offset);
842 readl(pcie_index_offset);
843 if (pcie_index_hi != 0) {
844 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
845 readl(pcie_index_hi_offset);
846 }
847 writel(reg_data, pcie_data_offset);
848 readl(pcie_data_offset);
849
850 /* clear the high bits */
851 if (pcie_index_hi != 0) {
852 writel(0, pcie_index_hi_offset);
853 readl(pcie_index_hi_offset);
854 }
855
856 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
857}
858
1bba3683
HZ
859/**
860 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
861 *
862 * @adev: amdgpu_device pointer
863 * @pcie_index: mmio register offset
864 * @pcie_data: mmio register offset
865 * @reg_addr: indirect register offset
866 * @reg_data: indirect register data
867 *
868 */
869void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
870 u32 reg_addr, u64 reg_data)
871{
65ba96e9 872 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
873 void __iomem *pcie_index_offset;
874 void __iomem *pcie_data_offset;
875
65ba96e9
HZ
876 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
877 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
878
1bba3683
HZ
879 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
880 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
881 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
882
883 /* write low 32 bits */
884 writel(reg_addr, pcie_index_offset);
885 readl(pcie_index_offset);
886 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
887 readl(pcie_data_offset);
888 /* write high 32 bits */
889 writel(reg_addr + 4, pcie_index_offset);
890 readl(pcie_index_offset);
891 writel((u32)(reg_data >> 32), pcie_data_offset);
892 readl(pcie_data_offset);
893 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
894}
895
dabc114e
HZ
896/**
897 * amdgpu_device_get_rev_id - query device rev_id
898 *
899 * @adev: amdgpu_device pointer
900 *
901 * Return device rev_id
902 */
903u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
904{
905 return adev->nbio.funcs->get_rev_id(adev);
906}
907
d38ceaf9
AD
908/**
909 * amdgpu_invalid_rreg - dummy reg read function
910 *
982a820b 911 * @adev: amdgpu_device pointer
d38ceaf9
AD
912 * @reg: offset of register
913 *
914 * Dummy register read function. Used for register blocks
915 * that certain asics don't have (all asics).
916 * Returns the value in the register.
917 */
918static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
919{
920 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
921 BUG();
922 return 0;
923}
924
0c552ed3
LM
925static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
926{
927 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
928 BUG();
929 return 0;
930}
931
d38ceaf9
AD
932/**
933 * amdgpu_invalid_wreg - dummy reg write function
934 *
982a820b 935 * @adev: amdgpu_device pointer
d38ceaf9
AD
936 * @reg: offset of register
937 * @v: value to write to the register
938 *
939 * Dummy register read function. Used for register blocks
940 * that certain asics don't have (all asics).
941 */
942static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
943{
944 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
945 reg, v);
946 BUG();
947}
948
0c552ed3
LM
949static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
950{
951 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
952 reg, v);
953 BUG();
954}
955
4fa1c6a6
TZ
956/**
957 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
958 *
982a820b 959 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
960 * @reg: offset of register
961 *
962 * Dummy register read function. Used for register blocks
963 * that certain asics don't have (all asics).
964 * Returns the value in the register.
965 */
966static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
967{
968 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
969 BUG();
970 return 0;
971}
972
973/**
974 * amdgpu_invalid_wreg64 - dummy reg write function
975 *
982a820b 976 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
977 * @reg: offset of register
978 * @v: value to write to the register
979 *
980 * Dummy register read function. Used for register blocks
981 * that certain asics don't have (all asics).
982 */
983static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
984{
985 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
986 reg, v);
987 BUG();
988}
989
d38ceaf9
AD
990/**
991 * amdgpu_block_invalid_rreg - dummy reg read function
992 *
982a820b 993 * @adev: amdgpu_device pointer
d38ceaf9
AD
994 * @block: offset of instance
995 * @reg: offset of register
996 *
997 * Dummy register read function. Used for register blocks
998 * that certain asics don't have (all asics).
999 * Returns the value in the register.
1000 */
1001static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1002 uint32_t block, uint32_t reg)
1003{
1004 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1005 reg, block);
1006 BUG();
1007 return 0;
1008}
1009
1010/**
1011 * amdgpu_block_invalid_wreg - dummy reg write function
1012 *
982a820b 1013 * @adev: amdgpu_device pointer
d38ceaf9
AD
1014 * @block: offset of instance
1015 * @reg: offset of register
1016 * @v: value to write to the register
1017 *
1018 * Dummy register read function. Used for register blocks
1019 * that certain asics don't have (all asics).
1020 */
1021static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1022 uint32_t block,
1023 uint32_t reg, uint32_t v)
1024{
1025 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1026 reg, block, v);
1027 BUG();
1028}
1029
4d2997ab
AD
1030/**
1031 * amdgpu_device_asic_init - Wrapper for atom asic_init
1032 *
982a820b 1033 * @adev: amdgpu_device pointer
4d2997ab
AD
1034 *
1035 * Does any asic specific work and then calls atom asic init.
1036 */
1037static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1038{
1039 amdgpu_asic_pre_asic_init(adev);
1040
5db392a0
LL
1041 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
1042 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
85d1bcc6
HZ
1043 return amdgpu_atomfirmware_asic_init(adev, true);
1044 else
1045 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
1046}
1047
e3ecdffa 1048/**
7ccfd79f 1049 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1050 *
982a820b 1051 * @adev: amdgpu_device pointer
e3ecdffa
AD
1052 *
1053 * Allocates a scratch page of VRAM for use by various things in the
1054 * driver.
1055 */
7ccfd79f 1056static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1057{
7ccfd79f
CK
1058 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1059 AMDGPU_GEM_DOMAIN_VRAM |
1060 AMDGPU_GEM_DOMAIN_GTT,
1061 &adev->mem_scratch.robj,
1062 &adev->mem_scratch.gpu_addr,
1063 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1064}
1065
e3ecdffa 1066/**
7ccfd79f 1067 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1068 *
982a820b 1069 * @adev: amdgpu_device pointer
e3ecdffa
AD
1070 *
1071 * Frees the VRAM scratch page.
1072 */
7ccfd79f 1073static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1074{
7ccfd79f 1075 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1076}
1077
1078/**
9c3f2b54 1079 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1080 *
1081 * @adev: amdgpu_device pointer
1082 * @registers: pointer to the register array
1083 * @array_size: size of the register array
1084 *
1085 * Programs an array or registers with and and or masks.
1086 * This is a helper for setting golden registers.
1087 */
9c3f2b54
AD
1088void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1089 const u32 *registers,
1090 const u32 array_size)
d38ceaf9
AD
1091{
1092 u32 tmp, reg, and_mask, or_mask;
1093 int i;
1094
1095 if (array_size % 3)
1096 return;
1097
47fc644f 1098 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1099 reg = registers[i + 0];
1100 and_mask = registers[i + 1];
1101 or_mask = registers[i + 2];
1102
1103 if (and_mask == 0xffffffff) {
1104 tmp = or_mask;
1105 } else {
1106 tmp = RREG32(reg);
1107 tmp &= ~and_mask;
e0d07657
HZ
1108 if (adev->family >= AMDGPU_FAMILY_AI)
1109 tmp |= (or_mask & and_mask);
1110 else
1111 tmp |= or_mask;
d38ceaf9
AD
1112 }
1113 WREG32(reg, tmp);
1114 }
1115}
1116
e3ecdffa
AD
1117/**
1118 * amdgpu_device_pci_config_reset - reset the GPU
1119 *
1120 * @adev: amdgpu_device pointer
1121 *
1122 * Resets the GPU using the pci config reset sequence.
1123 * Only applicable to asics prior to vega10.
1124 */
8111c387 1125void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1126{
1127 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1128}
1129
af484df8
AD
1130/**
1131 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1132 *
1133 * @adev: amdgpu_device pointer
1134 *
1135 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1136 */
1137int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1138{
1139 return pci_reset_function(adev->pdev);
1140}
1141
d38ceaf9
AD
1142/*
1143 * GPU doorbell aperture helpers function.
1144 */
1145/**
06ec9070 1146 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1147 *
1148 * @adev: amdgpu_device pointer
1149 *
1150 * Init doorbell driver information (CIK)
1151 * Returns 0 on success, error on failure.
1152 */
06ec9070 1153static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1154{
6585661d 1155
705e519e
CK
1156 /* No doorbell on SI hardware generation */
1157 if (adev->asic_type < CHIP_BONAIRE) {
1158 adev->doorbell.base = 0;
1159 adev->doorbell.size = 0;
0512e9ff 1160 adev->doorbell.num_kernel_doorbells = 0;
705e519e
CK
1161 adev->doorbell.ptr = NULL;
1162 return 0;
1163 }
1164
d6895ad3
CK
1165 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1166 return -EINVAL;
1167
22357775
AD
1168 amdgpu_asic_init_doorbell_index(adev);
1169
d38ceaf9
AD
1170 /* doorbell bar mapping */
1171 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1172 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1173
de33a329 1174 if (adev->enable_mes) {
0512e9ff 1175 adev->doorbell.num_kernel_doorbells =
de33a329
JX
1176 adev->doorbell.size / sizeof(u32);
1177 } else {
0512e9ff 1178 adev->doorbell.num_kernel_doorbells =
de33a329
JX
1179 min_t(u32, adev->doorbell.size / sizeof(u32),
1180 adev->doorbell_index.max_assignment+1);
0512e9ff 1181 if (adev->doorbell.num_kernel_doorbells == 0)
de33a329
JX
1182 return -EINVAL;
1183
1184 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1185 * paging queue doorbell use the second page. The
1186 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1187 * doorbells are in the first page. So with paging queue enabled,
0512e9ff 1188 * the max num_kernel_doorbells should + 1 page (0x400 in dword)
de33a329 1189 */
0ee20b86
LM
1190 if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
1191 adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0))
0512e9ff 1192 adev->doorbell.num_kernel_doorbells += 0x400;
de33a329 1193 }
ec3db8a6 1194
8972e5d2 1195 adev->doorbell.ptr = ioremap(adev->doorbell.base,
0512e9ff 1196 adev->doorbell.num_kernel_doorbells *
8972e5d2
CK
1197 sizeof(u32));
1198 if (adev->doorbell.ptr == NULL)
d38ceaf9 1199 return -ENOMEM;
d38ceaf9
AD
1200
1201 return 0;
1202}
1203
1204/**
06ec9070 1205 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1206 *
1207 * @adev: amdgpu_device pointer
1208 *
1209 * Tear down doorbell driver information (CIK)
1210 */
06ec9070 1211static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1212{
1213 iounmap(adev->doorbell.ptr);
1214 adev->doorbell.ptr = NULL;
1215}
1216
22cb0164 1217
d38ceaf9
AD
1218
1219/*
06ec9070 1220 * amdgpu_device_wb_*()
455a7bc2 1221 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1222 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1223 */
1224
1225/**
06ec9070 1226 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1227 *
1228 * @adev: amdgpu_device pointer
1229 *
1230 * Disables Writeback and frees the Writeback memory (all asics).
1231 * Used at driver shutdown.
1232 */
06ec9070 1233static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1234{
1235 if (adev->wb.wb_obj) {
a76ed485
AD
1236 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1237 &adev->wb.gpu_addr,
1238 (void **)&adev->wb.wb);
d38ceaf9
AD
1239 adev->wb.wb_obj = NULL;
1240 }
1241}
1242
1243/**
03f2abb0 1244 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1245 *
1246 * @adev: amdgpu_device pointer
1247 *
455a7bc2 1248 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1249 * Used at driver startup.
1250 * Returns 0 on success or an -error on failure.
1251 */
06ec9070 1252static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1253{
1254 int r;
1255
1256 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1257 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1258 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1259 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1260 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1261 (void **)&adev->wb.wb);
d38ceaf9
AD
1262 if (r) {
1263 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1264 return r;
1265 }
d38ceaf9
AD
1266
1267 adev->wb.num_wb = AMDGPU_MAX_WB;
1268 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1269
1270 /* clear wb memory */
73469585 1271 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1272 }
1273
1274 return 0;
1275}
1276
1277/**
131b4b36 1278 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1279 *
1280 * @adev: amdgpu_device pointer
1281 * @wb: wb index
1282 *
1283 * Allocate a wb slot for use by the driver (all asics).
1284 * Returns 0 on success or -EINVAL on failure.
1285 */
131b4b36 1286int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1287{
1288 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1289
97407b63 1290 if (offset < adev->wb.num_wb) {
7014285a 1291 __set_bit(offset, adev->wb.used);
63ae07ca 1292 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1293 return 0;
1294 } else {
1295 return -EINVAL;
1296 }
1297}
1298
d38ceaf9 1299/**
131b4b36 1300 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1301 *
1302 * @adev: amdgpu_device pointer
1303 * @wb: wb index
1304 *
1305 * Free a wb slot allocated for use by the driver (all asics)
1306 */
131b4b36 1307void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1308{
73469585 1309 wb >>= 3;
d38ceaf9 1310 if (wb < adev->wb.num_wb)
73469585 1311 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1312}
1313
d6895ad3
CK
1314/**
1315 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1316 *
1317 * @adev: amdgpu_device pointer
1318 *
1319 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1320 * to fail, but if any of the BARs is not accessible after the size we abort
1321 * driver loading by returning -ENODEV.
1322 */
1323int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1324{
453f617a 1325 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1326 struct pci_bus *root;
1327 struct resource *res;
1328 unsigned i;
d6895ad3
CK
1329 u16 cmd;
1330 int r;
1331
0c03b912 1332 /* Bypass for VF */
1333 if (amdgpu_sriov_vf(adev))
1334 return 0;
1335
b7221f2b
AD
1336 /* skip if the bios has already enabled large BAR */
1337 if (adev->gmc.real_vram_size &&
1338 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1339 return 0;
1340
31b8adab
CK
1341 /* Check if the root BUS has 64bit memory resources */
1342 root = adev->pdev->bus;
1343 while (root->parent)
1344 root = root->parent;
1345
1346 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1347 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1348 res->start > 0x100000000ull)
1349 break;
1350 }
1351
1352 /* Trying to resize is pointless without a root hub window above 4GB */
1353 if (!res)
1354 return 0;
1355
453f617a
ND
1356 /* Limit the BAR size to what is available */
1357 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1358 rbar_size);
1359
d6895ad3
CK
1360 /* Disable memory decoding while we change the BAR addresses and size */
1361 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1362 pci_write_config_word(adev->pdev, PCI_COMMAND,
1363 cmd & ~PCI_COMMAND_MEMORY);
1364
1365 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1366 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1367 if (adev->asic_type >= CHIP_BONAIRE)
1368 pci_release_resource(adev->pdev, 2);
1369
1370 pci_release_resource(adev->pdev, 0);
1371
1372 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1373 if (r == -ENOSPC)
1374 DRM_INFO("Not enough PCI address space for a large BAR.");
1375 else if (r && r != -ENOTSUPP)
1376 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1377
1378 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1379
1380 /* When the doorbell or fb BAR isn't available we have no chance of
1381 * using the device.
1382 */
06ec9070 1383 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1384 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1385 return -ENODEV;
1386
1387 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1388
1389 return 0;
1390}
a05502e5 1391
d38ceaf9
AD
1392/*
1393 * GPU helpers function.
1394 */
1395/**
39c640c0 1396 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1397 *
1398 * @adev: amdgpu_device pointer
1399 *
c836fec5
JQ
1400 * Check if the asic has been initialized (all asics) at driver startup
1401 * or post is needed if hw reset is performed.
1402 * Returns true if need or false if not.
d38ceaf9 1403 */
39c640c0 1404bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1405{
1406 uint32_t reg;
1407
bec86378
ML
1408 if (amdgpu_sriov_vf(adev))
1409 return false;
1410
1411 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1412 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1413 * some old smc fw still need driver do vPost otherwise gpu hang, while
1414 * those smc fw version above 22.15 doesn't have this flaw, so we force
1415 * vpost executed for smc version below 22.15
bec86378
ML
1416 */
1417 if (adev->asic_type == CHIP_FIJI) {
1418 int err;
1419 uint32_t fw_ver;
1420 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1421 /* force vPost if error occured */
1422 if (err)
1423 return true;
1424
1425 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1426 if (fw_ver < 0x00160e00)
1427 return true;
bec86378 1428 }
bec86378 1429 }
91fe77eb 1430
e3c1b071 1431 /* Don't post if we need to reset whole hive on init */
1432 if (adev->gmc.xgmi.pending_reset)
1433 return false;
1434
91fe77eb 1435 if (adev->has_hw_reset) {
1436 adev->has_hw_reset = false;
1437 return true;
1438 }
1439
1440 /* bios scratch used on CIK+ */
1441 if (adev->asic_type >= CHIP_BONAIRE)
1442 return amdgpu_atombios_scratch_need_asic_init(adev);
1443
1444 /* check MEM_SIZE for older asics */
1445 reg = amdgpu_asic_get_config_memsize(adev);
1446
1447 if ((reg != 0) && (reg != 0xffffffff))
1448 return false;
1449
1450 return true;
bec86378
ML
1451}
1452
0ab5d711
ML
1453/**
1454 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1455 *
1456 * @adev: amdgpu_device pointer
1457 *
1458 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1459 * be set for this device.
1460 *
1461 * Returns true if it should be used or false if not.
1462 */
1463bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1464{
1465 switch (amdgpu_aspm) {
1466 case -1:
1467 break;
1468 case 0:
1469 return false;
1470 case 1:
1471 return true;
1472 default:
1473 return false;
1474 }
1475 return pcie_aspm_enabled(adev->pdev);
1476}
1477
3ad5dcfe
KHF
1478bool amdgpu_device_aspm_support_quirk(void)
1479{
1480#if IS_ENABLED(CONFIG_X86)
1481 struct cpuinfo_x86 *c = &cpu_data(0);
1482
1483 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1484#else
1485 return true;
1486#endif
1487}
1488
d38ceaf9
AD
1489/* if we get transitioned to only one device, take VGA back */
1490/**
06ec9070 1491 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1492 *
bf44e8ce 1493 * @pdev: PCI device pointer
d38ceaf9
AD
1494 * @state: enable/disable vga decode
1495 *
1496 * Enable/disable vga decode (all asics).
1497 * Returns VGA resource flags.
1498 */
bf44e8ce
CH
1499static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1500 bool state)
d38ceaf9 1501{
bf44e8ce 1502 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1503 amdgpu_asic_set_vga_state(adev, state);
1504 if (state)
1505 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1506 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1507 else
1508 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1509}
1510
e3ecdffa
AD
1511/**
1512 * amdgpu_device_check_block_size - validate the vm block size
1513 *
1514 * @adev: amdgpu_device pointer
1515 *
1516 * Validates the vm block size specified via module parameter.
1517 * The vm block size defines number of bits in page table versus page directory,
1518 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1519 * page table and the remaining bits are in the page directory.
1520 */
06ec9070 1521static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1522{
1523 /* defines number of bits in page table versus page directory,
1524 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1525 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1526 if (amdgpu_vm_block_size == -1)
1527 return;
a1adf8be 1528
bab4fee7 1529 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1530 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1531 amdgpu_vm_block_size);
97489129 1532 amdgpu_vm_block_size = -1;
a1adf8be 1533 }
a1adf8be
CZ
1534}
1535
e3ecdffa
AD
1536/**
1537 * amdgpu_device_check_vm_size - validate the vm size
1538 *
1539 * @adev: amdgpu_device pointer
1540 *
1541 * Validates the vm size in GB specified via module parameter.
1542 * The VM size is the size of the GPU virtual memory space in GB.
1543 */
06ec9070 1544static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1545{
64dab074
AD
1546 /* no need to check the default value */
1547 if (amdgpu_vm_size == -1)
1548 return;
1549
83ca145d
ZJ
1550 if (amdgpu_vm_size < 1) {
1551 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1552 amdgpu_vm_size);
f3368128 1553 amdgpu_vm_size = -1;
83ca145d 1554 }
83ca145d
ZJ
1555}
1556
7951e376
RZ
1557static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1558{
1559 struct sysinfo si;
a9d4fe2f 1560 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1561 uint64_t total_memory;
1562 uint64_t dram_size_seven_GB = 0x1B8000000;
1563 uint64_t dram_size_three_GB = 0xB8000000;
1564
1565 if (amdgpu_smu_memory_pool_size == 0)
1566 return;
1567
1568 if (!is_os_64) {
1569 DRM_WARN("Not 64-bit OS, feature not supported\n");
1570 goto def_value;
1571 }
1572 si_meminfo(&si);
1573 total_memory = (uint64_t)si.totalram * si.mem_unit;
1574
1575 if ((amdgpu_smu_memory_pool_size == 1) ||
1576 (amdgpu_smu_memory_pool_size == 2)) {
1577 if (total_memory < dram_size_three_GB)
1578 goto def_value1;
1579 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1580 (amdgpu_smu_memory_pool_size == 8)) {
1581 if (total_memory < dram_size_seven_GB)
1582 goto def_value1;
1583 } else {
1584 DRM_WARN("Smu memory pool size not supported\n");
1585 goto def_value;
1586 }
1587 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1588
1589 return;
1590
1591def_value1:
1592 DRM_WARN("No enough system memory\n");
1593def_value:
1594 adev->pm.smu_prv_buffer_size = 0;
1595}
1596
9f6a7857
HR
1597static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1598{
1599 if (!(adev->flags & AMD_IS_APU) ||
1600 adev->asic_type < CHIP_RAVEN)
1601 return 0;
1602
1603 switch (adev->asic_type) {
1604 case CHIP_RAVEN:
1605 if (adev->pdev->device == 0x15dd)
1606 adev->apu_flags |= AMD_APU_IS_RAVEN;
1607 if (adev->pdev->device == 0x15d8)
1608 adev->apu_flags |= AMD_APU_IS_PICASSO;
1609 break;
1610 case CHIP_RENOIR:
1611 if ((adev->pdev->device == 0x1636) ||
1612 (adev->pdev->device == 0x164c))
1613 adev->apu_flags |= AMD_APU_IS_RENOIR;
1614 else
1615 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1616 break;
1617 case CHIP_VANGOGH:
1618 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1619 break;
1620 case CHIP_YELLOW_CARP:
1621 break;
d0f56dc2 1622 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1623 if ((adev->pdev->device == 0x13FE) ||
1624 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1625 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1626 break;
9f6a7857 1627 default:
4eaf21b7 1628 break;
9f6a7857
HR
1629 }
1630
1631 return 0;
1632}
1633
d38ceaf9 1634/**
06ec9070 1635 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1636 *
1637 * @adev: amdgpu_device pointer
1638 *
1639 * Validates certain module parameters and updates
1640 * the associated values used by the driver (all asics).
1641 */
912dfc84 1642static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1643{
5b011235
CZ
1644 if (amdgpu_sched_jobs < 4) {
1645 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1646 amdgpu_sched_jobs);
1647 amdgpu_sched_jobs = 4;
47fc644f 1648 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1649 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1650 amdgpu_sched_jobs);
1651 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1652 }
d38ceaf9 1653
83e74db6 1654 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1655 /* gart size must be greater or equal to 32M */
1656 dev_warn(adev->dev, "gart size (%d) too small\n",
1657 amdgpu_gart_size);
83e74db6 1658 amdgpu_gart_size = -1;
d38ceaf9
AD
1659 }
1660
36d38372 1661 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1662 /* gtt size must be greater or equal to 32M */
36d38372
CK
1663 dev_warn(adev->dev, "gtt size (%d) too small\n",
1664 amdgpu_gtt_size);
1665 amdgpu_gtt_size = -1;
d38ceaf9
AD
1666 }
1667
d07f14be
RH
1668 /* valid range is between 4 and 9 inclusive */
1669 if (amdgpu_vm_fragment_size != -1 &&
1670 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1671 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1672 amdgpu_vm_fragment_size = -1;
1673 }
1674
5d5bd5e3
KW
1675 if (amdgpu_sched_hw_submission < 2) {
1676 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1677 amdgpu_sched_hw_submission);
1678 amdgpu_sched_hw_submission = 2;
1679 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1680 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1681 amdgpu_sched_hw_submission);
1682 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1683 }
1684
2656fd23
AG
1685 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1686 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1687 amdgpu_reset_method = -1;
1688 }
1689
7951e376
RZ
1690 amdgpu_device_check_smu_prv_buffer_size(adev);
1691
06ec9070 1692 amdgpu_device_check_vm_size(adev);
d38ceaf9 1693
06ec9070 1694 amdgpu_device_check_block_size(adev);
6a7f76e7 1695
19aede77 1696 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1697
e3c00faa 1698 return 0;
d38ceaf9
AD
1699}
1700
1701/**
1702 * amdgpu_switcheroo_set_state - set switcheroo state
1703 *
1704 * @pdev: pci dev pointer
1694467b 1705 * @state: vga_switcheroo state
d38ceaf9 1706 *
12024b17 1707 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1708 * the asics before or after it is powered up using ACPI methods.
1709 */
8aba21b7
LT
1710static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1711 enum vga_switcheroo_state state)
d38ceaf9
AD
1712{
1713 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1714 int r;
d38ceaf9 1715
b98c6299 1716 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1717 return;
1718
1719 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1720 pr_info("switched on\n");
d38ceaf9
AD
1721 /* don't suspend or resume card normally */
1722 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1723
8f66090b
TZ
1724 pci_set_power_state(pdev, PCI_D0);
1725 amdgpu_device_load_pci_state(pdev);
1726 r = pci_enable_device(pdev);
de185019
AD
1727 if (r)
1728 DRM_WARN("pci_enable_device failed (%d)\n", r);
1729 amdgpu_device_resume(dev, true);
d38ceaf9 1730
d38ceaf9 1731 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1732 } else {
dd4fa6c1 1733 pr_info("switched off\n");
d38ceaf9 1734 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1735 amdgpu_device_suspend(dev, true);
8f66090b 1736 amdgpu_device_cache_pci_state(pdev);
de185019 1737 /* Shut down the device */
8f66090b
TZ
1738 pci_disable_device(pdev);
1739 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1740 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1741 }
1742}
1743
1744/**
1745 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1746 *
1747 * @pdev: pci dev pointer
1748 *
1749 * Callback for the switcheroo driver. Check of the switcheroo
1750 * state can be changed.
1751 * Returns true if the state can be changed, false if not.
1752 */
1753static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1754{
1755 struct drm_device *dev = pci_get_drvdata(pdev);
1756
1757 /*
1758 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1759 * locking inversion with the driver load path. And the access here is
1760 * completely racy anyway. So don't bother with locking for now.
1761 */
7e13ad89 1762 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1763}
1764
1765static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1766 .set_gpu_state = amdgpu_switcheroo_set_state,
1767 .reprobe = NULL,
1768 .can_switch = amdgpu_switcheroo_can_switch,
1769};
1770
e3ecdffa
AD
1771/**
1772 * amdgpu_device_ip_set_clockgating_state - set the CG state
1773 *
87e3f136 1774 * @dev: amdgpu_device pointer
e3ecdffa
AD
1775 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1776 * @state: clockgating state (gate or ungate)
1777 *
1778 * Sets the requested clockgating state for all instances of
1779 * the hardware IP specified.
1780 * Returns the error code from the last instance.
1781 */
43fa561f 1782int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1783 enum amd_ip_block_type block_type,
1784 enum amd_clockgating_state state)
d38ceaf9 1785{
43fa561f 1786 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1787 int i, r = 0;
1788
1789 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1790 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1791 continue;
c722865a
RZ
1792 if (adev->ip_blocks[i].version->type != block_type)
1793 continue;
1794 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1795 continue;
1796 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1797 (void *)adev, state);
1798 if (r)
1799 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1800 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1801 }
1802 return r;
1803}
1804
e3ecdffa
AD
1805/**
1806 * amdgpu_device_ip_set_powergating_state - set the PG state
1807 *
87e3f136 1808 * @dev: amdgpu_device pointer
e3ecdffa
AD
1809 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1810 * @state: powergating state (gate or ungate)
1811 *
1812 * Sets the requested powergating state for all instances of
1813 * the hardware IP specified.
1814 * Returns the error code from the last instance.
1815 */
43fa561f 1816int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1817 enum amd_ip_block_type block_type,
1818 enum amd_powergating_state state)
d38ceaf9 1819{
43fa561f 1820 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1821 int i, r = 0;
1822
1823 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1824 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1825 continue;
c722865a
RZ
1826 if (adev->ip_blocks[i].version->type != block_type)
1827 continue;
1828 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1829 continue;
1830 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1831 (void *)adev, state);
1832 if (r)
1833 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1834 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1835 }
1836 return r;
1837}
1838
e3ecdffa
AD
1839/**
1840 * amdgpu_device_ip_get_clockgating_state - get the CG state
1841 *
1842 * @adev: amdgpu_device pointer
1843 * @flags: clockgating feature flags
1844 *
1845 * Walks the list of IPs on the device and updates the clockgating
1846 * flags for each IP.
1847 * Updates @flags with the feature flags for each hardware IP where
1848 * clockgating is enabled.
1849 */
2990a1fc 1850void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1851 u64 *flags)
6cb2d4e4
HR
1852{
1853 int i;
1854
1855 for (i = 0; i < adev->num_ip_blocks; i++) {
1856 if (!adev->ip_blocks[i].status.valid)
1857 continue;
1858 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1859 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1860 }
1861}
1862
e3ecdffa
AD
1863/**
1864 * amdgpu_device_ip_wait_for_idle - wait for idle
1865 *
1866 * @adev: amdgpu_device pointer
1867 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1868 *
1869 * Waits for the request hardware IP to be idle.
1870 * Returns 0 for success or a negative error code on failure.
1871 */
2990a1fc
AD
1872int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1873 enum amd_ip_block_type block_type)
5dbbb60b
AD
1874{
1875 int i, r;
1876
1877 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1878 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1879 continue;
a1255107
AD
1880 if (adev->ip_blocks[i].version->type == block_type) {
1881 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1882 if (r)
1883 return r;
1884 break;
1885 }
1886 }
1887 return 0;
1888
1889}
1890
e3ecdffa
AD
1891/**
1892 * amdgpu_device_ip_is_idle - is the hardware IP idle
1893 *
1894 * @adev: amdgpu_device pointer
1895 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1896 *
1897 * Check if the hardware IP is idle or not.
1898 * Returns true if it the IP is idle, false if not.
1899 */
2990a1fc
AD
1900bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1901 enum amd_ip_block_type block_type)
5dbbb60b
AD
1902{
1903 int i;
1904
1905 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1906 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1907 continue;
a1255107
AD
1908 if (adev->ip_blocks[i].version->type == block_type)
1909 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1910 }
1911 return true;
1912
1913}
1914
e3ecdffa
AD
1915/**
1916 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1917 *
1918 * @adev: amdgpu_device pointer
87e3f136 1919 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1920 *
1921 * Returns a pointer to the hardware IP block structure
1922 * if it exists for the asic, otherwise NULL.
1923 */
2990a1fc
AD
1924struct amdgpu_ip_block *
1925amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1926 enum amd_ip_block_type type)
d38ceaf9
AD
1927{
1928 int i;
1929
1930 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1931 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1932 return &adev->ip_blocks[i];
1933
1934 return NULL;
1935}
1936
1937/**
2990a1fc 1938 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1939 *
1940 * @adev: amdgpu_device pointer
5fc3aeeb 1941 * @type: enum amd_ip_block_type
d38ceaf9
AD
1942 * @major: major version
1943 * @minor: minor version
1944 *
1945 * return 0 if equal or greater
1946 * return 1 if smaller or the ip_block doesn't exist
1947 */
2990a1fc
AD
1948int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1949 enum amd_ip_block_type type,
1950 u32 major, u32 minor)
d38ceaf9 1951{
2990a1fc 1952 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1953
a1255107
AD
1954 if (ip_block && ((ip_block->version->major > major) ||
1955 ((ip_block->version->major == major) &&
1956 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1957 return 0;
1958
1959 return 1;
1960}
1961
a1255107 1962/**
2990a1fc 1963 * amdgpu_device_ip_block_add
a1255107
AD
1964 *
1965 * @adev: amdgpu_device pointer
1966 * @ip_block_version: pointer to the IP to add
1967 *
1968 * Adds the IP block driver information to the collection of IPs
1969 * on the asic.
1970 */
2990a1fc
AD
1971int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1972 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1973{
1974 if (!ip_block_version)
1975 return -EINVAL;
1976
7bd939d0
LG
1977 switch (ip_block_version->type) {
1978 case AMD_IP_BLOCK_TYPE_VCN:
1979 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1980 return 0;
1981 break;
1982 case AMD_IP_BLOCK_TYPE_JPEG:
1983 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1984 return 0;
1985 break;
1986 default:
1987 break;
1988 }
1989
e966a725 1990 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1991 ip_block_version->funcs->name);
1992
a1255107
AD
1993 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1994
1995 return 0;
1996}
1997
e3ecdffa
AD
1998/**
1999 * amdgpu_device_enable_virtual_display - enable virtual display feature
2000 *
2001 * @adev: amdgpu_device pointer
2002 *
2003 * Enabled the virtual display feature if the user has enabled it via
2004 * the module parameter virtual_display. This feature provides a virtual
2005 * display hardware on headless boards or in virtualized environments.
2006 * This function parses and validates the configuration string specified by
2007 * the user and configues the virtual display configuration (number of
2008 * virtual connectors, crtcs, etc.) specified.
2009 */
483ef985 2010static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2011{
2012 adev->enable_virtual_display = false;
2013
2014 if (amdgpu_virtual_display) {
8f66090b 2015 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2016 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2017
2018 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2019 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2020 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2021 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2022 if (!strcmp("all", pciaddname)
2023 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2024 long num_crtc;
2025 int res = -1;
2026
9accf2fd 2027 adev->enable_virtual_display = true;
0f66356d
ED
2028
2029 if (pciaddname_tmp)
2030 res = kstrtol(pciaddname_tmp, 10,
2031 &num_crtc);
2032
2033 if (!res) {
2034 if (num_crtc < 1)
2035 num_crtc = 1;
2036 if (num_crtc > 6)
2037 num_crtc = 6;
2038 adev->mode_info.num_crtc = num_crtc;
2039 } else {
2040 adev->mode_info.num_crtc = 1;
2041 }
9accf2fd
ED
2042 break;
2043 }
2044 }
2045
0f66356d
ED
2046 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2047 amdgpu_virtual_display, pci_address_name,
2048 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2049
2050 kfree(pciaddstr);
2051 }
2052}
2053
25263da3
AD
2054void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2055{
2056 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2057 adev->mode_info.num_crtc = 1;
2058 adev->enable_virtual_display = true;
2059 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2060 adev->enable_virtual_display, adev->mode_info.num_crtc);
2061 }
2062}
2063
e3ecdffa
AD
2064/**
2065 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2066 *
2067 * @adev: amdgpu_device pointer
2068 *
2069 * Parses the asic configuration parameters specified in the gpu info
2070 * firmware and makes them availale to the driver for use in configuring
2071 * the asic.
2072 * Returns 0 on success, -EINVAL on failure.
2073 */
e2a75f88
AD
2074static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2075{
e2a75f88 2076 const char *chip_name;
c0a43457 2077 char fw_name[40];
e2a75f88
AD
2078 int err;
2079 const struct gpu_info_firmware_header_v1_0 *hdr;
2080
ab4fe3e1
HR
2081 adev->firmware.gpu_info_fw = NULL;
2082
72de33f8 2083 if (adev->mman.discovery_bin) {
cc375d8c
TY
2084 /*
2085 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2086 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2087 * when DAL no longer needs it.
2088 */
2089 if (adev->asic_type != CHIP_NAVI12)
2090 return 0;
258620d0
AD
2091 }
2092
e2a75f88 2093 switch (adev->asic_type) {
e2a75f88
AD
2094 default:
2095 return 0;
2096 case CHIP_VEGA10:
2097 chip_name = "vega10";
2098 break;
3f76dced
AD
2099 case CHIP_VEGA12:
2100 chip_name = "vega12";
2101 break;
2d2e5e7e 2102 case CHIP_RAVEN:
54f78a76 2103 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2104 chip_name = "raven2";
54f78a76 2105 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2106 chip_name = "picasso";
54c4d17e
FX
2107 else
2108 chip_name = "raven";
2d2e5e7e 2109 break;
65e60f6e
LM
2110 case CHIP_ARCTURUS:
2111 chip_name = "arcturus";
2112 break;
42b325e5
XY
2113 case CHIP_NAVI12:
2114 chip_name = "navi12";
2115 break;
e2a75f88
AD
2116 }
2117
2118 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2119 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2120 if (err) {
2121 dev_err(adev->dev,
b31d3063 2122 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2123 fw_name);
2124 goto out;
2125 }
2126
ab4fe3e1 2127 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2128 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2129
2130 switch (hdr->version_major) {
2131 case 1:
2132 {
2133 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2134 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2135 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2136
cc375d8c
TY
2137 /*
2138 * Should be droped when DAL no longer needs it.
2139 */
2140 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2141 goto parse_soc_bounding_box;
2142
b5ab16bf
AD
2143 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2144 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2145 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2146 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2147 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2148 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2149 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2150 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2151 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2152 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2153 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2154 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2155 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2156 adev->gfx.cu_info.max_waves_per_simd =
2157 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2158 adev->gfx.cu_info.max_scratch_slots_per_cu =
2159 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2160 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2161 if (hdr->version_minor >= 1) {
35c2e910
HZ
2162 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2163 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2164 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2165 adev->gfx.config.num_sc_per_sh =
2166 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2167 adev->gfx.config.num_packer_per_sc =
2168 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2169 }
ec51d3fa
XY
2170
2171parse_soc_bounding_box:
ec51d3fa
XY
2172 /*
2173 * soc bounding box info is not integrated in disocovery table,
258620d0 2174 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2175 */
48321c3d
HW
2176 if (hdr->version_minor == 2) {
2177 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2178 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2179 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2180 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2181 }
e2a75f88
AD
2182 break;
2183 }
2184 default:
2185 dev_err(adev->dev,
2186 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2187 err = -EINVAL;
2188 goto out;
2189 }
2190out:
e2a75f88
AD
2191 return err;
2192}
2193
e3ecdffa
AD
2194/**
2195 * amdgpu_device_ip_early_init - run early init for hardware IPs
2196 *
2197 * @adev: amdgpu_device pointer
2198 *
2199 * Early initialization pass for hardware IPs. The hardware IPs that make
2200 * up each asic are discovered each IP's early_init callback is run. This
2201 * is the first stage in initializing the asic.
2202 * Returns 0 on success, negative error code on failure.
2203 */
06ec9070 2204static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2205{
901e2be2
AD
2206 struct drm_device *dev = adev_to_drm(adev);
2207 struct pci_dev *parent;
aaa36a97 2208 int i, r;
ced69502 2209 bool total;
d38ceaf9 2210
483ef985 2211 amdgpu_device_enable_virtual_display(adev);
a6be7570 2212
00a979f3 2213 if (amdgpu_sriov_vf(adev)) {
00a979f3 2214 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2215 if (r)
2216 return r;
00a979f3
WS
2217 }
2218
d38ceaf9 2219 switch (adev->asic_type) {
33f34802
KW
2220#ifdef CONFIG_DRM_AMDGPU_SI
2221 case CHIP_VERDE:
2222 case CHIP_TAHITI:
2223 case CHIP_PITCAIRN:
2224 case CHIP_OLAND:
2225 case CHIP_HAINAN:
295d0daf 2226 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2227 r = si_set_ip_blocks(adev);
2228 if (r)
2229 return r;
2230 break;
2231#endif
a2e73f56
AD
2232#ifdef CONFIG_DRM_AMDGPU_CIK
2233 case CHIP_BONAIRE:
2234 case CHIP_HAWAII:
2235 case CHIP_KAVERI:
2236 case CHIP_KABINI:
2237 case CHIP_MULLINS:
e1ad2d53 2238 if (adev->flags & AMD_IS_APU)
a2e73f56 2239 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2240 else
2241 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2242
2243 r = cik_set_ip_blocks(adev);
2244 if (r)
2245 return r;
2246 break;
2247#endif
da87c30b
AD
2248 case CHIP_TOPAZ:
2249 case CHIP_TONGA:
2250 case CHIP_FIJI:
2251 case CHIP_POLARIS10:
2252 case CHIP_POLARIS11:
2253 case CHIP_POLARIS12:
2254 case CHIP_VEGAM:
2255 case CHIP_CARRIZO:
2256 case CHIP_STONEY:
2257 if (adev->flags & AMD_IS_APU)
2258 adev->family = AMDGPU_FAMILY_CZ;
2259 else
2260 adev->family = AMDGPU_FAMILY_VI;
2261
2262 r = vi_set_ip_blocks(adev);
2263 if (r)
2264 return r;
2265 break;
d38ceaf9 2266 default:
63352b7f
AD
2267 r = amdgpu_discovery_set_ip_blocks(adev);
2268 if (r)
2269 return r;
2270 break;
d38ceaf9
AD
2271 }
2272
901e2be2
AD
2273 if (amdgpu_has_atpx() &&
2274 (amdgpu_is_atpx_hybrid() ||
2275 amdgpu_has_atpx_dgpu_power_cntl()) &&
2276 ((adev->flags & AMD_IS_APU) == 0) &&
2277 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2278 adev->flags |= AMD_IS_PX;
2279
85ac2021
AD
2280 if (!(adev->flags & AMD_IS_APU)) {
2281 parent = pci_upstream_bridge(adev->pdev);
2282 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2283 }
901e2be2 2284
1884734a 2285
3b94fb10 2286 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2287 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2288 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2289 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2290 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2291
ced69502 2292 total = true;
d38ceaf9
AD
2293 for (i = 0; i < adev->num_ip_blocks; i++) {
2294 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2295 DRM_ERROR("disabled ip block: %d <%s>\n",
2296 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2297 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2298 } else {
a1255107
AD
2299 if (adev->ip_blocks[i].version->funcs->early_init) {
2300 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2301 if (r == -ENOENT) {
a1255107 2302 adev->ip_blocks[i].status.valid = false;
2c1a2784 2303 } else if (r) {
a1255107
AD
2304 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2305 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2306 total = false;
2c1a2784 2307 } else {
a1255107 2308 adev->ip_blocks[i].status.valid = true;
2c1a2784 2309 }
974e6b64 2310 } else {
a1255107 2311 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2312 }
d38ceaf9 2313 }
21a249ca
AD
2314 /* get the vbios after the asic_funcs are set up */
2315 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2316 r = amdgpu_device_parse_gpu_info_fw(adev);
2317 if (r)
2318 return r;
2319
21a249ca
AD
2320 /* Read BIOS */
2321 if (!amdgpu_get_bios(adev))
2322 return -EINVAL;
2323
2324 r = amdgpu_atombios_init(adev);
2325 if (r) {
2326 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2327 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2328 return r;
2329 }
77eabc6f
PJZ
2330
2331 /*get pf2vf msg info at it's earliest time*/
2332 if (amdgpu_sriov_vf(adev))
2333 amdgpu_virt_init_data_exchange(adev);
2334
21a249ca 2335 }
d38ceaf9 2336 }
ced69502
ML
2337 if (!total)
2338 return -ENODEV;
d38ceaf9 2339
00fa4035 2340 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2341 adev->cg_flags &= amdgpu_cg_mask;
2342 adev->pg_flags &= amdgpu_pg_mask;
2343
d38ceaf9
AD
2344 return 0;
2345}
2346
0a4f2520
RZ
2347static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2348{
2349 int i, r;
2350
2351 for (i = 0; i < adev->num_ip_blocks; i++) {
2352 if (!adev->ip_blocks[i].status.sw)
2353 continue;
2354 if (adev->ip_blocks[i].status.hw)
2355 continue;
2356 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2357 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2358 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2359 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2360 if (r) {
2361 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2362 adev->ip_blocks[i].version->funcs->name, r);
2363 return r;
2364 }
2365 adev->ip_blocks[i].status.hw = true;
2366 }
2367 }
2368
2369 return 0;
2370}
2371
2372static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2373{
2374 int i, r;
2375
2376 for (i = 0; i < adev->num_ip_blocks; i++) {
2377 if (!adev->ip_blocks[i].status.sw)
2378 continue;
2379 if (adev->ip_blocks[i].status.hw)
2380 continue;
2381 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2382 if (r) {
2383 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2384 adev->ip_blocks[i].version->funcs->name, r);
2385 return r;
2386 }
2387 adev->ip_blocks[i].status.hw = true;
2388 }
2389
2390 return 0;
2391}
2392
7a3e0bb2
RZ
2393static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2394{
2395 int r = 0;
2396 int i;
80f41f84 2397 uint32_t smu_version;
7a3e0bb2
RZ
2398
2399 if (adev->asic_type >= CHIP_VEGA10) {
2400 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2401 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2402 continue;
2403
e3c1b071 2404 if (!adev->ip_blocks[i].status.sw)
2405 continue;
2406
482f0e53
ML
2407 /* no need to do the fw loading again if already done*/
2408 if (adev->ip_blocks[i].status.hw == true)
2409 break;
2410
53b3f8f4 2411 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2412 r = adev->ip_blocks[i].version->funcs->resume(adev);
2413 if (r) {
2414 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2415 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2416 return r;
2417 }
2418 } else {
2419 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2420 if (r) {
2421 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2422 adev->ip_blocks[i].version->funcs->name, r);
2423 return r;
7a3e0bb2 2424 }
7a3e0bb2 2425 }
482f0e53
ML
2426
2427 adev->ip_blocks[i].status.hw = true;
2428 break;
7a3e0bb2
RZ
2429 }
2430 }
482f0e53 2431
8973d9ec
ED
2432 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2433 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2434
80f41f84 2435 return r;
7a3e0bb2
RZ
2436}
2437
5fd8518d
AG
2438static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2439{
2440 long timeout;
2441 int r, i;
2442
2443 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2444 struct amdgpu_ring *ring = adev->rings[i];
2445
2446 /* No need to setup the GPU scheduler for rings that don't need it */
2447 if (!ring || ring->no_scheduler)
2448 continue;
2449
2450 switch (ring->funcs->type) {
2451 case AMDGPU_RING_TYPE_GFX:
2452 timeout = adev->gfx_timeout;
2453 break;
2454 case AMDGPU_RING_TYPE_COMPUTE:
2455 timeout = adev->compute_timeout;
2456 break;
2457 case AMDGPU_RING_TYPE_SDMA:
2458 timeout = adev->sdma_timeout;
2459 break;
2460 default:
2461 timeout = adev->video_timeout;
2462 break;
2463 }
2464
2465 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2466 ring->num_hw_submission, 0,
8ab62eda
JG
2467 timeout, adev->reset_domain->wq,
2468 ring->sched_score, ring->name,
2469 adev->dev);
5fd8518d
AG
2470 if (r) {
2471 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2472 ring->name);
2473 return r;
2474 }
2475 }
2476
2477 return 0;
2478}
2479
2480
e3ecdffa
AD
2481/**
2482 * amdgpu_device_ip_init - run init for hardware IPs
2483 *
2484 * @adev: amdgpu_device pointer
2485 *
2486 * Main initialization pass for hardware IPs. The list of all the hardware
2487 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2488 * are run. sw_init initializes the software state associated with each IP
2489 * and hw_init initializes the hardware associated with each IP.
2490 * Returns 0 on success, negative error code on failure.
2491 */
06ec9070 2492static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2493{
2494 int i, r;
2495
c030f2e4 2496 r = amdgpu_ras_init(adev);
2497 if (r)
2498 return r;
2499
d38ceaf9 2500 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2501 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2502 continue;
a1255107 2503 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2504 if (r) {
a1255107
AD
2505 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2506 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2507 goto init_failed;
2c1a2784 2508 }
a1255107 2509 adev->ip_blocks[i].status.sw = true;
bfca0289 2510
c1c39032
AD
2511 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2512 /* need to do common hw init early so everything is set up for gmc */
2513 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2514 if (r) {
2515 DRM_ERROR("hw_init %d failed %d\n", i, r);
2516 goto init_failed;
2517 }
2518 adev->ip_blocks[i].status.hw = true;
2519 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2520 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2521 /* Try to reserve bad pages early */
2522 if (amdgpu_sriov_vf(adev))
2523 amdgpu_virt_exchange_data(adev);
2524
7ccfd79f 2525 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2526 if (r) {
7ccfd79f 2527 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2528 goto init_failed;
2c1a2784 2529 }
a1255107 2530 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2531 if (r) {
2532 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2533 goto init_failed;
2c1a2784 2534 }
06ec9070 2535 r = amdgpu_device_wb_init(adev);
2c1a2784 2536 if (r) {
06ec9070 2537 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2538 goto init_failed;
2c1a2784 2539 }
a1255107 2540 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2541
2542 /* right after GMC hw init, we create CSA */
8a1fbb4a 2543 if (amdgpu_mcbp) {
1e256e27 2544 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2545 AMDGPU_GEM_DOMAIN_VRAM |
2546 AMDGPU_GEM_DOMAIN_GTT,
2547 AMDGPU_CSA_SIZE);
2493664f
ML
2548 if (r) {
2549 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2550 goto init_failed;
2493664f
ML
2551 }
2552 }
d38ceaf9
AD
2553 }
2554 }
2555
c9ffa427 2556 if (amdgpu_sriov_vf(adev))
22c16d25 2557 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2558
533aed27
AG
2559 r = amdgpu_ib_pool_init(adev);
2560 if (r) {
2561 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2562 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2563 goto init_failed;
2564 }
2565
c8963ea4
RZ
2566 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2567 if (r)
72d3f592 2568 goto init_failed;
0a4f2520
RZ
2569
2570 r = amdgpu_device_ip_hw_init_phase1(adev);
2571 if (r)
72d3f592 2572 goto init_failed;
0a4f2520 2573
7a3e0bb2
RZ
2574 r = amdgpu_device_fw_loading(adev);
2575 if (r)
72d3f592 2576 goto init_failed;
7a3e0bb2 2577
0a4f2520
RZ
2578 r = amdgpu_device_ip_hw_init_phase2(adev);
2579 if (r)
72d3f592 2580 goto init_failed;
d38ceaf9 2581
121a2bc6
AG
2582 /*
2583 * retired pages will be loaded from eeprom and reserved here,
2584 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2585 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2586 * for I2C communication which only true at this point.
b82e65a9
GC
2587 *
2588 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2589 * failure from bad gpu situation and stop amdgpu init process
2590 * accordingly. For other failed cases, it will still release all
2591 * the resource and print error message, rather than returning one
2592 * negative value to upper level.
121a2bc6
AG
2593 *
2594 * Note: theoretically, this should be called before all vram allocations
2595 * to protect retired page from abusing
2596 */
b82e65a9
GC
2597 r = amdgpu_ras_recovery_init(adev);
2598 if (r)
2599 goto init_failed;
121a2bc6 2600
cfbb6b00
AG
2601 /**
2602 * In case of XGMI grab extra reference for reset domain for this device
2603 */
a4c63caf 2604 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2605 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2606 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2607 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2608
dfd0287b
LH
2609 if (WARN_ON(!hive)) {
2610 r = -ENOENT;
2611 goto init_failed;
2612 }
2613
46c67660 2614 if (!hive->reset_domain ||
2615 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2616 r = -ENOENT;
2617 amdgpu_put_xgmi_hive(hive);
2618 goto init_failed;
2619 }
2620
2621 /* Drop the early temporary reset domain we created for device */
2622 amdgpu_reset_put_reset_domain(adev->reset_domain);
2623 adev->reset_domain = hive->reset_domain;
9dfa4860 2624 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2625 }
a4c63caf
AG
2626 }
2627 }
2628
5fd8518d
AG
2629 r = amdgpu_device_init_schedulers(adev);
2630 if (r)
2631 goto init_failed;
e3c1b071 2632
2633 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2634 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2635 amdgpu_amdkfd_device_init(adev);
c6332b97 2636
bd607166
KR
2637 amdgpu_fru_get_product_info(adev);
2638
72d3f592 2639init_failed:
c6332b97 2640
72d3f592 2641 return r;
d38ceaf9
AD
2642}
2643
e3ecdffa
AD
2644/**
2645 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2646 *
2647 * @adev: amdgpu_device pointer
2648 *
2649 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2650 * this function before a GPU reset. If the value is retained after a
2651 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2652 */
06ec9070 2653static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2654{
2655 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2656}
2657
e3ecdffa
AD
2658/**
2659 * amdgpu_device_check_vram_lost - check if vram is valid
2660 *
2661 * @adev: amdgpu_device pointer
2662 *
2663 * Checks the reset magic value written to the gart pointer in VRAM.
2664 * The driver calls this after a GPU reset to see if the contents of
2665 * VRAM is lost or now.
2666 * returns true if vram is lost, false if not.
2667 */
06ec9070 2668static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2669{
dadce777
EQ
2670 if (memcmp(adev->gart.ptr, adev->reset_magic,
2671 AMDGPU_RESET_MAGIC_NUM))
2672 return true;
2673
53b3f8f4 2674 if (!amdgpu_in_reset(adev))
dadce777
EQ
2675 return false;
2676
2677 /*
2678 * For all ASICs with baco/mode1 reset, the VRAM is
2679 * always assumed to be lost.
2680 */
2681 switch (amdgpu_asic_reset_method(adev)) {
2682 case AMD_RESET_METHOD_BACO:
2683 case AMD_RESET_METHOD_MODE1:
2684 return true;
2685 default:
2686 return false;
2687 }
0c49e0b8
CZ
2688}
2689
e3ecdffa 2690/**
1112a46b 2691 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2692 *
2693 * @adev: amdgpu_device pointer
b8b72130 2694 * @state: clockgating state (gate or ungate)
e3ecdffa 2695 *
e3ecdffa 2696 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2697 * set_clockgating_state callbacks are run.
2698 * Late initialization pass enabling clockgating for hardware IPs.
2699 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2700 * Returns 0 on success, negative error code on failure.
2701 */
fdd34271 2702
5d89bb2d
LL
2703int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2704 enum amd_clockgating_state state)
d38ceaf9 2705{
1112a46b 2706 int i, j, r;
d38ceaf9 2707
4a2ba394
SL
2708 if (amdgpu_emu_mode == 1)
2709 return 0;
2710
1112a46b
RZ
2711 for (j = 0; j < adev->num_ip_blocks; j++) {
2712 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2713 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2714 continue;
47198eb7 2715 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2716 if (adev->in_s0ix &&
47198eb7
AD
2717 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2718 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2719 continue;
4a446d55 2720 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2721 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2722 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2723 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2724 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2725 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2726 /* enable clockgating to save power */
a1255107 2727 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2728 state);
4a446d55
AD
2729 if (r) {
2730 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2731 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2732 return r;
2733 }
b0b00ff1 2734 }
d38ceaf9 2735 }
06b18f61 2736
c9f96fd5
RZ
2737 return 0;
2738}
2739
5d89bb2d
LL
2740int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2741 enum amd_powergating_state state)
c9f96fd5 2742{
1112a46b 2743 int i, j, r;
06b18f61 2744
c9f96fd5
RZ
2745 if (amdgpu_emu_mode == 1)
2746 return 0;
2747
1112a46b
RZ
2748 for (j = 0; j < adev->num_ip_blocks; j++) {
2749 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2750 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2751 continue;
47198eb7 2752 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2753 if (adev->in_s0ix &&
47198eb7
AD
2754 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2755 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2756 continue;
c9f96fd5
RZ
2757 /* skip CG for VCE/UVD, it's handled specially */
2758 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2759 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2760 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2761 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2762 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2763 /* enable powergating to save power */
2764 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2765 state);
c9f96fd5
RZ
2766 if (r) {
2767 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2768 adev->ip_blocks[i].version->funcs->name, r);
2769 return r;
2770 }
2771 }
2772 }
2dc80b00
S
2773 return 0;
2774}
2775
beff74bc
AD
2776static int amdgpu_device_enable_mgpu_fan_boost(void)
2777{
2778 struct amdgpu_gpu_instance *gpu_ins;
2779 struct amdgpu_device *adev;
2780 int i, ret = 0;
2781
2782 mutex_lock(&mgpu_info.mutex);
2783
2784 /*
2785 * MGPU fan boost feature should be enabled
2786 * only when there are two or more dGPUs in
2787 * the system
2788 */
2789 if (mgpu_info.num_dgpu < 2)
2790 goto out;
2791
2792 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2793 gpu_ins = &(mgpu_info.gpu_ins[i]);
2794 adev = gpu_ins->adev;
2795 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2796 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2797 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2798 if (ret)
2799 break;
2800
2801 gpu_ins->mgpu_fan_enabled = 1;
2802 }
2803 }
2804
2805out:
2806 mutex_unlock(&mgpu_info.mutex);
2807
2808 return ret;
2809}
2810
e3ecdffa
AD
2811/**
2812 * amdgpu_device_ip_late_init - run late init for hardware IPs
2813 *
2814 * @adev: amdgpu_device pointer
2815 *
2816 * Late initialization pass for hardware IPs. The list of all the hardware
2817 * IPs that make up the asic is walked and the late_init callbacks are run.
2818 * late_init covers any special initialization that an IP requires
2819 * after all of the have been initialized or something that needs to happen
2820 * late in the init process.
2821 * Returns 0 on success, negative error code on failure.
2822 */
06ec9070 2823static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2824{
60599a03 2825 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2826 int i = 0, r;
2827
2828 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2829 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2830 continue;
2831 if (adev->ip_blocks[i].version->funcs->late_init) {
2832 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2833 if (r) {
2834 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2835 adev->ip_blocks[i].version->funcs->name, r);
2836 return r;
2837 }
2dc80b00 2838 }
73f847db 2839 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2840 }
2841
867e24ca 2842 r = amdgpu_ras_late_init(adev);
2843 if (r) {
2844 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2845 return r;
2846 }
2847
a891d239
DL
2848 amdgpu_ras_set_error_query_ready(adev, true);
2849
1112a46b
RZ
2850 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2851 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2852
06ec9070 2853 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2854
beff74bc
AD
2855 r = amdgpu_device_enable_mgpu_fan_boost();
2856 if (r)
2857 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2858
4da8b639 2859 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2860 if (amdgpu_passthrough(adev) &&
2861 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2862 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2863 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2864
2865 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2866 mutex_lock(&mgpu_info.mutex);
2867
2868 /*
2869 * Reset device p-state to low as this was booted with high.
2870 *
2871 * This should be performed only after all devices from the same
2872 * hive get initialized.
2873 *
2874 * However, it's unknown how many device in the hive in advance.
2875 * As this is counted one by one during devices initializations.
2876 *
2877 * So, we wait for all XGMI interlinked devices initialized.
2878 * This may bring some delays as those devices may come from
2879 * different hives. But that should be OK.
2880 */
2881 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2882 for (i = 0; i < mgpu_info.num_gpu; i++) {
2883 gpu_instance = &(mgpu_info.gpu_ins[i]);
2884 if (gpu_instance->adev->flags & AMD_IS_APU)
2885 continue;
2886
d84a430d
JK
2887 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2888 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2889 if (r) {
2890 DRM_ERROR("pstate setting failed (%d).\n", r);
2891 break;
2892 }
2893 }
2894 }
2895
2896 mutex_unlock(&mgpu_info.mutex);
2897 }
2898
d38ceaf9
AD
2899 return 0;
2900}
2901
613aa3ea
LY
2902/**
2903 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2904 *
2905 * @adev: amdgpu_device pointer
2906 *
2907 * For ASICs need to disable SMC first
2908 */
2909static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2910{
2911 int i, r;
2912
2913 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2914 return;
2915
2916 for (i = 0; i < adev->num_ip_blocks; i++) {
2917 if (!adev->ip_blocks[i].status.hw)
2918 continue;
2919 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2920 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2921 /* XXX handle errors */
2922 if (r) {
2923 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2924 adev->ip_blocks[i].version->funcs->name, r);
2925 }
2926 adev->ip_blocks[i].status.hw = false;
2927 break;
2928 }
2929 }
2930}
2931
e9669fb7 2932static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2933{
2934 int i, r;
2935
e9669fb7
AG
2936 for (i = 0; i < adev->num_ip_blocks; i++) {
2937 if (!adev->ip_blocks[i].version->funcs->early_fini)
2938 continue;
5278a159 2939
e9669fb7
AG
2940 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2941 if (r) {
2942 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2943 adev->ip_blocks[i].version->funcs->name, r);
2944 }
2945 }
c030f2e4 2946
05df1f01 2947 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2948 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2949
7270e895
TY
2950 amdgpu_amdkfd_suspend(adev, false);
2951
613aa3ea
LY
2952 /* Workaroud for ASICs need to disable SMC first */
2953 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2954
d38ceaf9 2955 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2956 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2957 continue;
8201a67a 2958
a1255107 2959 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2960 /* XXX handle errors */
2c1a2784 2961 if (r) {
a1255107
AD
2962 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2963 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2964 }
8201a67a 2965
a1255107 2966 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2967 }
2968
6effad8a
GC
2969 if (amdgpu_sriov_vf(adev)) {
2970 if (amdgpu_virt_release_full_gpu(adev, false))
2971 DRM_ERROR("failed to release exclusive mode on fini\n");
2972 }
2973
e9669fb7
AG
2974 return 0;
2975}
2976
2977/**
2978 * amdgpu_device_ip_fini - run fini for hardware IPs
2979 *
2980 * @adev: amdgpu_device pointer
2981 *
2982 * Main teardown pass for hardware IPs. The list of all the hardware
2983 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2984 * are run. hw_fini tears down the hardware associated with each IP
2985 * and sw_fini tears down any software state associated with each IP.
2986 * Returns 0 on success, negative error code on failure.
2987 */
2988static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2989{
2990 int i, r;
2991
2992 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2993 amdgpu_virt_release_ras_err_handler_data(adev);
2994
e9669fb7
AG
2995 if (adev->gmc.xgmi.num_physical_nodes > 1)
2996 amdgpu_xgmi_remove_device(adev);
2997
c004d44e 2998 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2999
d38ceaf9 3000 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3001 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3002 continue;
c12aba3a
ML
3003
3004 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3005 amdgpu_ucode_free_bo(adev);
1e256e27 3006 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3007 amdgpu_device_wb_fini(adev);
7ccfd79f 3008 amdgpu_device_mem_scratch_fini(adev);
533aed27 3009 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3010 }
3011
a1255107 3012 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3013 /* XXX handle errors */
2c1a2784 3014 if (r) {
a1255107
AD
3015 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3016 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3017 }
a1255107
AD
3018 adev->ip_blocks[i].status.sw = false;
3019 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3020 }
3021
a6dcfd9c 3022 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3023 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3024 continue;
a1255107
AD
3025 if (adev->ip_blocks[i].version->funcs->late_fini)
3026 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3027 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3028 }
3029
c030f2e4 3030 amdgpu_ras_fini(adev);
3031
d38ceaf9
AD
3032 return 0;
3033}
3034
e3ecdffa 3035/**
beff74bc 3036 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3037 *
1112a46b 3038 * @work: work_struct.
e3ecdffa 3039 */
beff74bc 3040static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3041{
3042 struct amdgpu_device *adev =
beff74bc 3043 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3044 int r;
3045
3046 r = amdgpu_ib_ring_tests(adev);
3047 if (r)
3048 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3049}
3050
1e317b99
RZ
3051static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3052{
3053 struct amdgpu_device *adev =
3054 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3055
90a92662
MD
3056 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3057 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3058
3059 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3060 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3061}
3062
e3ecdffa 3063/**
e7854a03 3064 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3065 *
3066 * @adev: amdgpu_device pointer
3067 *
3068 * Main suspend function for hardware IPs. The list of all the hardware
3069 * IPs that make up the asic is walked, clockgating is disabled and the
3070 * suspend callbacks are run. suspend puts the hardware and software state
3071 * in each IP into a state suitable for suspend.
3072 * Returns 0 on success, negative error code on failure.
3073 */
e7854a03
AD
3074static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3075{
3076 int i, r;
3077
50ec83f0
AD
3078 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3079 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3080
b31d6ada
EQ
3081 /*
3082 * Per PMFW team's suggestion, driver needs to handle gfxoff
3083 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3084 * scenario. Add the missing df cstate disablement here.
3085 */
3086 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3087 dev_warn(adev->dev, "Failed to disallow df cstate");
3088
e7854a03
AD
3089 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3090 if (!adev->ip_blocks[i].status.valid)
3091 continue;
2b9f7848 3092
e7854a03 3093 /* displays are handled separately */
2b9f7848
ND
3094 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3095 continue;
3096
3097 /* XXX handle errors */
3098 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3099 /* XXX handle errors */
3100 if (r) {
3101 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3102 adev->ip_blocks[i].version->funcs->name, r);
3103 return r;
e7854a03 3104 }
2b9f7848
ND
3105
3106 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3107 }
3108
e7854a03
AD
3109 return 0;
3110}
3111
3112/**
3113 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3114 *
3115 * @adev: amdgpu_device pointer
3116 *
3117 * Main suspend function for hardware IPs. The list of all the hardware
3118 * IPs that make up the asic is walked, clockgating is disabled and the
3119 * suspend callbacks are run. suspend puts the hardware and software state
3120 * in each IP into a state suitable for suspend.
3121 * Returns 0 on success, negative error code on failure.
3122 */
3123static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3124{
3125 int i, r;
3126
557f42a2 3127 if (adev->in_s0ix)
bc143d8b 3128 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3129
d38ceaf9 3130 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3131 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3132 continue;
e7854a03
AD
3133 /* displays are handled in phase1 */
3134 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3135 continue;
bff77e86
LM
3136 /* PSP lost connection when err_event_athub occurs */
3137 if (amdgpu_ras_intr_triggered() &&
3138 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3139 adev->ip_blocks[i].status.hw = false;
3140 continue;
3141 }
e3c1b071 3142
3143 /* skip unnecessary suspend if we do not initialize them yet */
3144 if (adev->gmc.xgmi.pending_reset &&
3145 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3146 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3147 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3148 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3149 adev->ip_blocks[i].status.hw = false;
3150 continue;
3151 }
557f42a2 3152
afa6646b 3153 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3154 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3155 * like at runtime. PSP is also part of the always on hardware
3156 * so no need to suspend it.
3157 */
557f42a2 3158 if (adev->in_s0ix &&
32ff160d 3159 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3160 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3161 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3162 continue;
3163
2a7798ea
AD
3164 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3165 if (adev->in_s0ix &&
3166 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3167 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3168 continue;
3169
e11c7750
TH
3170 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3171 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3172 * from this location and RLC Autoload automatically also gets loaded
3173 * from here based on PMFW -> PSP message during re-init sequence.
3174 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3175 * the TMR and reload FWs again for IMU enabled APU ASICs.
3176 */
3177 if (amdgpu_in_reset(adev) &&
3178 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3179 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3180 continue;
3181
d38ceaf9 3182 /* XXX handle errors */
a1255107 3183 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3184 /* XXX handle errors */
2c1a2784 3185 if (r) {
a1255107
AD
3186 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3187 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3188 }
876923fb 3189 adev->ip_blocks[i].status.hw = false;
a3a09142 3190 /* handle putting the SMC in the appropriate state */
47fc644f 3191 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3192 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3193 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3194 if (r) {
3195 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3196 adev->mp1_state, r);
3197 return r;
3198 }
a3a09142
AD
3199 }
3200 }
d38ceaf9
AD
3201 }
3202
3203 return 0;
3204}
3205
e7854a03
AD
3206/**
3207 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3208 *
3209 * @adev: amdgpu_device pointer
3210 *
3211 * Main suspend function for hardware IPs. The list of all the hardware
3212 * IPs that make up the asic is walked, clockgating is disabled and the
3213 * suspend callbacks are run. suspend puts the hardware and software state
3214 * in each IP into a state suitable for suspend.
3215 * Returns 0 on success, negative error code on failure.
3216 */
3217int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3218{
3219 int r;
3220
3c73683c
JC
3221 if (amdgpu_sriov_vf(adev)) {
3222 amdgpu_virt_fini_data_exchange(adev);
e7819644 3223 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3224 }
e7819644 3225
e7854a03
AD
3226 r = amdgpu_device_ip_suspend_phase1(adev);
3227 if (r)
3228 return r;
3229 r = amdgpu_device_ip_suspend_phase2(adev);
3230
e7819644
YT
3231 if (amdgpu_sriov_vf(adev))
3232 amdgpu_virt_release_full_gpu(adev, false);
3233
e7854a03
AD
3234 return r;
3235}
3236
06ec9070 3237static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3238{
3239 int i, r;
3240
2cb681b6 3241 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3242 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3243 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3244 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3245 AMD_IP_BLOCK_TYPE_IH,
3246 };
a90ad3c2 3247
95ea3dbc 3248 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3249 int j;
3250 struct amdgpu_ip_block *block;
a90ad3c2 3251
4cd2a96d
J
3252 block = &adev->ip_blocks[i];
3253 block->status.hw = false;
2cb681b6 3254
4cd2a96d 3255 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3256
4cd2a96d 3257 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3258 !block->status.valid)
3259 continue;
3260
3261 r = block->version->funcs->hw_init(adev);
0aaeefcc 3262 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3263 if (r)
3264 return r;
482f0e53 3265 block->status.hw = true;
a90ad3c2
ML
3266 }
3267 }
3268
3269 return 0;
3270}
3271
06ec9070 3272static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3273{
3274 int i, r;
3275
2cb681b6
ML
3276 static enum amd_ip_block_type ip_order[] = {
3277 AMD_IP_BLOCK_TYPE_SMC,
3278 AMD_IP_BLOCK_TYPE_DCE,
3279 AMD_IP_BLOCK_TYPE_GFX,
3280 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3281 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3282 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3283 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3284 AMD_IP_BLOCK_TYPE_VCN,
3285 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3286 };
a90ad3c2 3287
2cb681b6
ML
3288 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3289 int j;
3290 struct amdgpu_ip_block *block;
a90ad3c2 3291
2cb681b6
ML
3292 for (j = 0; j < adev->num_ip_blocks; j++) {
3293 block = &adev->ip_blocks[j];
3294
3295 if (block->version->type != ip_order[i] ||
482f0e53
ML
3296 !block->status.valid ||
3297 block->status.hw)
2cb681b6
ML
3298 continue;
3299
895bd048
JZ
3300 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3301 r = block->version->funcs->resume(adev);
3302 else
3303 r = block->version->funcs->hw_init(adev);
3304
0aaeefcc 3305 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3306 if (r)
3307 return r;
482f0e53 3308 block->status.hw = true;
a90ad3c2
ML
3309 }
3310 }
3311
3312 return 0;
3313}
3314
e3ecdffa
AD
3315/**
3316 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3317 *
3318 * @adev: amdgpu_device pointer
3319 *
3320 * First resume function for hardware IPs. The list of all the hardware
3321 * IPs that make up the asic is walked and the resume callbacks are run for
3322 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3323 * after a suspend and updates the software state as necessary. This
3324 * function is also used for restoring the GPU after a GPU reset.
3325 * Returns 0 on success, negative error code on failure.
3326 */
06ec9070 3327static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3328{
3329 int i, r;
3330
a90ad3c2 3331 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3332 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3333 continue;
a90ad3c2 3334 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3335 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3336 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3337 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3338
fcf0649f
CZ
3339 r = adev->ip_blocks[i].version->funcs->resume(adev);
3340 if (r) {
3341 DRM_ERROR("resume of IP block <%s> failed %d\n",
3342 adev->ip_blocks[i].version->funcs->name, r);
3343 return r;
3344 }
482f0e53 3345 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3346 }
3347 }
3348
3349 return 0;
3350}
3351
e3ecdffa
AD
3352/**
3353 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3354 *
3355 * @adev: amdgpu_device pointer
3356 *
3357 * First resume function for hardware IPs. The list of all the hardware
3358 * IPs that make up the asic is walked and the resume callbacks are run for
3359 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3360 * functional state after a suspend and updates the software state as
3361 * necessary. This function is also used for restoring the GPU after a GPU
3362 * reset.
3363 * Returns 0 on success, negative error code on failure.
3364 */
06ec9070 3365static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3366{
3367 int i, r;
3368
3369 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3370 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3371 continue;
fcf0649f 3372 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3373 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3374 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3375 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3376 continue;
a1255107 3377 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3378 if (r) {
a1255107
AD
3379 DRM_ERROR("resume of IP block <%s> failed %d\n",
3380 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3381 return r;
2c1a2784 3382 }
482f0e53 3383 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3384 }
3385
3386 return 0;
3387}
3388
e3ecdffa
AD
3389/**
3390 * amdgpu_device_ip_resume - run resume for hardware IPs
3391 *
3392 * @adev: amdgpu_device pointer
3393 *
3394 * Main resume function for hardware IPs. The hardware IPs
3395 * are split into two resume functions because they are
3396 * are also used in in recovering from a GPU reset and some additional
3397 * steps need to be take between them. In this case (S3/S4) they are
3398 * run sequentially.
3399 * Returns 0 on success, negative error code on failure.
3400 */
06ec9070 3401static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3402{
3403 int r;
3404
f2206741
AL
3405 if (!adev->in_s0ix) {
3406 r = amdgpu_amdkfd_resume_iommu(adev);
3407 if (r)
3408 return r;
3409 }
9cec53c1 3410
06ec9070 3411 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3412 if (r)
3413 return r;
7a3e0bb2
RZ
3414
3415 r = amdgpu_device_fw_loading(adev);
3416 if (r)
3417 return r;
3418
06ec9070 3419 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3420
3421 return r;
3422}
3423
e3ecdffa
AD
3424/**
3425 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3426 *
3427 * @adev: amdgpu_device pointer
3428 *
3429 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3430 */
4e99a44e 3431static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3432{
6867e1b5
ML
3433 if (amdgpu_sriov_vf(adev)) {
3434 if (adev->is_atom_fw) {
58ff791a 3435 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3436 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3437 } else {
3438 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3439 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3440 }
3441
3442 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3443 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3444 }
048765ad
AR
3445}
3446
e3ecdffa
AD
3447/**
3448 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3449 *
3450 * @asic_type: AMD asic type
3451 *
3452 * Check if there is DC (new modesetting infrastructre) support for an asic.
3453 * returns true if DC has support, false if not.
3454 */
4562236b
HW
3455bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3456{
3457 switch (asic_type) {
0637d417
AD
3458#ifdef CONFIG_DRM_AMDGPU_SI
3459 case CHIP_HAINAN:
3460#endif
3461 case CHIP_TOPAZ:
3462 /* chips with no display hardware */
3463 return false;
4562236b 3464#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3465 case CHIP_TAHITI:
3466 case CHIP_PITCAIRN:
3467 case CHIP_VERDE:
3468 case CHIP_OLAND:
2d32ffd6
AD
3469 /*
3470 * We have systems in the wild with these ASICs that require
3471 * LVDS and VGA support which is not supported with DC.
3472 *
3473 * Fallback to the non-DC driver here by default so as not to
3474 * cause regressions.
3475 */
3476#if defined(CONFIG_DRM_AMD_DC_SI)
3477 return amdgpu_dc > 0;
3478#else
3479 return false;
64200c46 3480#endif
4562236b 3481 case CHIP_BONAIRE:
0d6fbccb 3482 case CHIP_KAVERI:
367e6687
AD
3483 case CHIP_KABINI:
3484 case CHIP_MULLINS:
d9fda248
HW
3485 /*
3486 * We have systems in the wild with these ASICs that require
b5a0168e 3487 * VGA support which is not supported with DC.
d9fda248
HW
3488 *
3489 * Fallback to the non-DC driver here by default so as not to
3490 * cause regressions.
3491 */
3492 return amdgpu_dc > 0;
f7f12b25 3493 default:
fd187853 3494 return amdgpu_dc != 0;
f7f12b25 3495#else
4562236b 3496 default:
93b09a9a 3497 if (amdgpu_dc > 0)
044a48f4 3498 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3499 "but isn't supported by ASIC, ignoring\n");
4562236b 3500 return false;
f7f12b25 3501#endif
4562236b
HW
3502 }
3503}
3504
3505/**
3506 * amdgpu_device_has_dc_support - check if dc is supported
3507 *
982a820b 3508 * @adev: amdgpu_device pointer
4562236b
HW
3509 *
3510 * Returns true for supported, false for not supported
3511 */
3512bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3513{
25263da3 3514 if (adev->enable_virtual_display ||
abaf210c 3515 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3516 return false;
3517
4562236b
HW
3518 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3519}
3520
d4535e2c
AG
3521static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3522{
3523 struct amdgpu_device *adev =
3524 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3525 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3526
c6a6e2db
AG
3527 /* It's a bug to not have a hive within this function */
3528 if (WARN_ON(!hive))
3529 return;
3530
3531 /*
3532 * Use task barrier to synchronize all xgmi reset works across the
3533 * hive. task_barrier_enter and task_barrier_exit will block
3534 * until all the threads running the xgmi reset works reach
3535 * those points. task_barrier_full will do both blocks.
3536 */
3537 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3538
3539 task_barrier_enter(&hive->tb);
4a580877 3540 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3541
3542 if (adev->asic_reset_res)
3543 goto fail;
3544
3545 task_barrier_exit(&hive->tb);
4a580877 3546 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3547
3548 if (adev->asic_reset_res)
3549 goto fail;
43c4d576 3550
5e67bba3 3551 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3552 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3553 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3554 } else {
3555
3556 task_barrier_full(&hive->tb);
3557 adev->asic_reset_res = amdgpu_asic_reset(adev);
3558 }
ce316fa5 3559
c6a6e2db 3560fail:
d4535e2c 3561 if (adev->asic_reset_res)
fed184e9 3562 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3563 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3564 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3565}
3566
71f98027
AD
3567static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3568{
3569 char *input = amdgpu_lockup_timeout;
3570 char *timeout_setting = NULL;
3571 int index = 0;
3572 long timeout;
3573 int ret = 0;
3574
3575 /*
67387dfe
AD
3576 * By default timeout for non compute jobs is 10000
3577 * and 60000 for compute jobs.
71f98027 3578 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3579 * jobs are 60000 by default.
71f98027
AD
3580 */
3581 adev->gfx_timeout = msecs_to_jiffies(10000);
3582 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3583 if (amdgpu_sriov_vf(adev))
3584 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3585 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3586 else
67387dfe 3587 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3588
f440ff44 3589 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3590 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3591 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3592 ret = kstrtol(timeout_setting, 0, &timeout);
3593 if (ret)
3594 return ret;
3595
3596 if (timeout == 0) {
3597 index++;
3598 continue;
3599 } else if (timeout < 0) {
3600 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3601 dev_warn(adev->dev, "lockup timeout disabled");
3602 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3603 } else {
3604 timeout = msecs_to_jiffies(timeout);
3605 }
3606
3607 switch (index++) {
3608 case 0:
3609 adev->gfx_timeout = timeout;
3610 break;
3611 case 1:
3612 adev->compute_timeout = timeout;
3613 break;
3614 case 2:
3615 adev->sdma_timeout = timeout;
3616 break;
3617 case 3:
3618 adev->video_timeout = timeout;
3619 break;
3620 default:
3621 break;
3622 }
3623 }
3624 /*
3625 * There is only one value specified and
3626 * it should apply to all non-compute jobs.
3627 */
bcccee89 3628 if (index == 1) {
71f98027 3629 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3630 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3631 adev->compute_timeout = adev->gfx_timeout;
3632 }
71f98027
AD
3633 }
3634
3635 return ret;
3636}
d4535e2c 3637
4a74c38c
PY
3638/**
3639 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3640 *
3641 * @adev: amdgpu_device pointer
3642 *
3643 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3644 */
3645static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3646{
3647 struct iommu_domain *domain;
3648
3649 domain = iommu_get_domain_for_dev(adev->dev);
3650 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3651 adev->ram_is_direct_mapped = true;
3652}
3653
77f3a5cd
ND
3654static const struct attribute *amdgpu_dev_attributes[] = {
3655 &dev_attr_product_name.attr,
3656 &dev_attr_product_number.attr,
3657 &dev_attr_serial_number.attr,
3658 &dev_attr_pcie_replay_count.attr,
3659 NULL
3660};
3661
d38ceaf9
AD
3662/**
3663 * amdgpu_device_init - initialize the driver
3664 *
3665 * @adev: amdgpu_device pointer
d38ceaf9
AD
3666 * @flags: driver flags
3667 *
3668 * Initializes the driver info and hw (all asics).
3669 * Returns 0 for success or an error on failure.
3670 * Called at driver startup.
3671 */
3672int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3673 uint32_t flags)
3674{
8aba21b7
LT
3675 struct drm_device *ddev = adev_to_drm(adev);
3676 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3677 int r, i;
b98c6299 3678 bool px = false;
95844d20 3679 u32 max_MBps;
59e9fff1 3680 int tmp;
d38ceaf9
AD
3681
3682 adev->shutdown = false;
d38ceaf9 3683 adev->flags = flags;
4e66d7d2
YZ
3684
3685 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3686 adev->asic_type = amdgpu_force_asic_type;
3687 else
3688 adev->asic_type = flags & AMD_ASIC_MASK;
3689
d38ceaf9 3690 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3691 if (amdgpu_emu_mode == 1)
8bdab6bb 3692 adev->usec_timeout *= 10;
770d13b1 3693 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3694 adev->accel_working = false;
3695 adev->num_rings = 0;
68ce8b24 3696 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3697 adev->mman.buffer_funcs = NULL;
3698 adev->mman.buffer_funcs_ring = NULL;
3699 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3700 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3701 adev->gmc.gmc_funcs = NULL;
7bd939d0 3702 adev->harvest_ip_mask = 0x0;
f54d1867 3703 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3704 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3705
3706 adev->smc_rreg = &amdgpu_invalid_rreg;
3707 adev->smc_wreg = &amdgpu_invalid_wreg;
3708 adev->pcie_rreg = &amdgpu_invalid_rreg;
3709 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3710 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3711 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3712 adev->pciep_rreg = &amdgpu_invalid_rreg;
3713 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3714 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3715 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3716 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3717 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3718 adev->didt_rreg = &amdgpu_invalid_rreg;
3719 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3720 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3721 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3722 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3723 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3724
3e39ab90
AD
3725 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3726 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3727 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3728
3729 /* mutex initialization are all done here so we
3730 * can recall function without having locking issues */
0e5ca0d1 3731 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3732 mutex_init(&adev->pm.mutex);
3733 mutex_init(&adev->gfx.gpu_clock_mutex);
3734 mutex_init(&adev->srbm_mutex);
b8866c26 3735 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3736 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3737 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3738 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3739 mutex_init(&adev->mn_lock);
e23b74aa 3740 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3741 hash_init(adev->mn_hash);
32eaeae0 3742 mutex_init(&adev->psp.mutex);
bd052211 3743 mutex_init(&adev->notifier_lock);
8cda7a4f 3744 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3745 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3746
ab3b9de6 3747 amdgpu_device_init_apu_flags(adev);
9f6a7857 3748
912dfc84
EQ
3749 r = amdgpu_device_check_arguments(adev);
3750 if (r)
3751 return r;
d38ceaf9 3752
d38ceaf9
AD
3753 spin_lock_init(&adev->mmio_idx_lock);
3754 spin_lock_init(&adev->smc_idx_lock);
3755 spin_lock_init(&adev->pcie_idx_lock);
3756 spin_lock_init(&adev->uvd_ctx_idx_lock);
3757 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3758 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3759 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3760 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3761 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3762
0c4e7fa5
CZ
3763 INIT_LIST_HEAD(&adev->shadow_list);
3764 mutex_init(&adev->shadow_list_lock);
3765
655ce9cb 3766 INIT_LIST_HEAD(&adev->reset_list);
3767
6492e1b0 3768 INIT_LIST_HEAD(&adev->ras_list);
3769
beff74bc
AD
3770 INIT_DELAYED_WORK(&adev->delayed_init_work,
3771 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3772 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3773 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3774
d4535e2c
AG
3775 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3776
d23ee13f 3777 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3778 adev->gfx.gfx_off_residency = 0;
3779 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3780 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3781
b265bdbd
EQ
3782 atomic_set(&adev->throttling_logging_enabled, 1);
3783 /*
3784 * If throttling continues, logging will be performed every minute
3785 * to avoid log flooding. "-1" is subtracted since the thermal
3786 * throttling interrupt comes every second. Thus, the total logging
3787 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3788 * for throttling interrupt) = 60 seconds.
3789 */
3790 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3791 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3792
0fa49558
AX
3793 /* Registers mapping */
3794 /* TODO: block userspace mapping of io register */
da69c161
KW
3795 if (adev->asic_type >= CHIP_BONAIRE) {
3796 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3797 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3798 } else {
3799 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3800 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3801 }
d38ceaf9 3802
6c08e0ef
EQ
3803 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3804 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3805
d38ceaf9
AD
3806 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3807 if (adev->rmmio == NULL) {
3808 return -ENOMEM;
3809 }
3810 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3811 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3812
5494d864
AD
3813 amdgpu_device_get_pcie_info(adev);
3814
b239c017
JX
3815 if (amdgpu_mcbp)
3816 DRM_INFO("MCBP is enabled\n");
3817
436afdfa
PY
3818 /*
3819 * Reset domain needs to be present early, before XGMI hive discovered
3820 * (if any) and intitialized to use reset sem and in_gpu reset flag
3821 * early on during init and before calling to RREG32.
3822 */
3823 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3824 if (!adev->reset_domain)
3825 return -ENOMEM;
3826
3aa0115d
ML
3827 /* detect hw virtualization here */
3828 amdgpu_detect_virtualization(adev);
3829
dffa11b4
ML
3830 r = amdgpu_device_get_job_timeout_settings(adev);
3831 if (r) {
3832 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3833 return r;
a190d1c7
XY
3834 }
3835
d38ceaf9 3836 /* early init functions */
06ec9070 3837 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3838 if (r)
4ef87d8f 3839 return r;
d38ceaf9 3840
b7cdb41e
ML
3841 /* Get rid of things like offb */
3842 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3843 if (r)
3844 return r;
3845
4d33e704
SK
3846 /* Enable TMZ based on IP_VERSION */
3847 amdgpu_gmc_tmz_set(adev);
3848
957b0787 3849 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3850 /* Need to get xgmi info early to decide the reset behavior*/
3851 if (adev->gmc.xgmi.supported) {
3852 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3853 if (r)
3854 return r;
3855 }
3856
8e6d0b69 3857 /* enable PCIE atomic ops */
3858 if (amdgpu_sriov_vf(adev))
3859 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
e15c9d06 3860 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
8e6d0b69 3861 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3862 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3863 * internal path natively support atomics, set have_atomics_support to true.
3864 */
3865 else if ((adev->flags & AMD_IS_APU) &&
3866 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
3867 adev->have_atomics_support = true;
8e6d0b69 3868 else
3869 adev->have_atomics_support =
3870 !pci_enable_atomic_ops_to_root(adev->pdev,
3871 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3872 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3873 if (!adev->have_atomics_support)
3874 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3875
6585661d
OZ
3876 /* doorbell bar mapping and doorbell index init*/
3877 amdgpu_device_doorbell_init(adev);
3878
9475a943
SL
3879 if (amdgpu_emu_mode == 1) {
3880 /* post the asic on emulation mode */
3881 emu_soc_asic_init(adev);
bfca0289 3882 goto fence_driver_init;
9475a943 3883 }
bfca0289 3884
04442bf7
LL
3885 amdgpu_reset_init(adev);
3886
4e99a44e
ML
3887 /* detect if we are with an SRIOV vbios */
3888 amdgpu_device_detect_sriov_bios(adev);
048765ad 3889
95e8e59e
AD
3890 /* check if we need to reset the asic
3891 * E.g., driver was not cleanly unloaded previously, etc.
3892 */
f14899fd 3893 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3894 if (adev->gmc.xgmi.num_physical_nodes) {
3895 dev_info(adev->dev, "Pending hive reset.\n");
3896 adev->gmc.xgmi.pending_reset = true;
3897 /* Only need to init necessary block for SMU to handle the reset */
3898 for (i = 0; i < adev->num_ip_blocks; i++) {
3899 if (!adev->ip_blocks[i].status.valid)
3900 continue;
3901 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3902 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3903 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3904 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3905 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3906 adev->ip_blocks[i].version->funcs->name);
3907 adev->ip_blocks[i].status.hw = true;
3908 }
3909 }
3910 } else {
59e9fff1 3911 tmp = amdgpu_reset_method;
3912 /* It should do a default reset when loading or reloading the driver,
3913 * regardless of the module parameter reset_method.
3914 */
3915 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3916 r = amdgpu_asic_reset(adev);
59e9fff1 3917 amdgpu_reset_method = tmp;
e3c1b071 3918 if (r) {
3919 dev_err(adev->dev, "asic reset on init failed\n");
3920 goto failed;
3921 }
95e8e59e
AD
3922 }
3923 }
3924
d38ceaf9 3925 /* Post card if necessary */
39c640c0 3926 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3927 if (!adev->bios) {
bec86378 3928 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3929 r = -EINVAL;
3930 goto failed;
d38ceaf9 3931 }
bec86378 3932 DRM_INFO("GPU posting now...\n");
4d2997ab 3933 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3934 if (r) {
3935 dev_err(adev->dev, "gpu post error!\n");
3936 goto failed;
3937 }
d38ceaf9
AD
3938 }
3939
88b64e95
AD
3940 if (adev->is_atom_fw) {
3941 /* Initialize clocks */
3942 r = amdgpu_atomfirmware_get_clock_info(adev);
3943 if (r) {
3944 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3945 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3946 goto failed;
3947 }
3948 } else {
a5bde2f9
AD
3949 /* Initialize clocks */
3950 r = amdgpu_atombios_get_clock_info(adev);
3951 if (r) {
3952 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3953 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3954 goto failed;
a5bde2f9
AD
3955 }
3956 /* init i2c buses */
4562236b
HW
3957 if (!amdgpu_device_has_dc_support(adev))
3958 amdgpu_atombios_i2c_init(adev);
2c1a2784 3959 }
d38ceaf9 3960
bfca0289 3961fence_driver_init:
d38ceaf9 3962 /* Fence driver */
067f44c8 3963 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3964 if (r) {
067f44c8 3965 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3966 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3967 goto failed;
2c1a2784 3968 }
d38ceaf9
AD
3969
3970 /* init the mode config */
4a580877 3971 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3972
06ec9070 3973 r = amdgpu_device_ip_init(adev);
d38ceaf9 3974 if (r) {
06ec9070 3975 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3976 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3977 goto release_ras_con;
d38ceaf9
AD
3978 }
3979
8d35a259
LG
3980 amdgpu_fence_driver_hw_init(adev);
3981
d69b8971
YZ
3982 dev_info(adev->dev,
3983 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3984 adev->gfx.config.max_shader_engines,
3985 adev->gfx.config.max_sh_per_se,
3986 adev->gfx.config.max_cu_per_sh,
3987 adev->gfx.cu_info.number);
3988
d38ceaf9
AD
3989 adev->accel_working = true;
3990
e59c0205
AX
3991 amdgpu_vm_check_compute_bug(adev);
3992
95844d20
MO
3993 /* Initialize the buffer migration limit. */
3994 if (amdgpu_moverate >= 0)
3995 max_MBps = amdgpu_moverate;
3996 else
3997 max_MBps = 8; /* Allow 8 MB/s. */
3998 /* Get a log2 for easy divisions. */
3999 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4000
d2f52ac8 4001 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
4002 if (r)
4003 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 4004
5bb23532 4005 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
4006 if (r) {
4007 adev->ucode_sysfs_en = false;
5bb23532 4008 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
4009 } else
4010 adev->ucode_sysfs_en = true;
5bb23532 4011
8424f2cc
LG
4012 r = amdgpu_psp_sysfs_init(adev);
4013 if (r) {
4014 adev->psp_sysfs_en = false;
4015 if (!amdgpu_sriov_vf(adev))
4016 DRM_ERROR("Creating psp sysfs failed\n");
4017 } else
4018 adev->psp_sysfs_en = true;
4019
b0adca4d
EQ
4020 /*
4021 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4022 * Otherwise the mgpu fan boost feature will be skipped due to the
4023 * gpu instance is counted less.
4024 */
4025 amdgpu_register_gpu_instance(adev);
4026
d38ceaf9
AD
4027 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4028 * explicit gating rather than handling it automatically.
4029 */
e3c1b071 4030 if (!adev->gmc.xgmi.pending_reset) {
4031 r = amdgpu_device_ip_late_init(adev);
4032 if (r) {
4033 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4034 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4035 goto release_ras_con;
e3c1b071 4036 }
4037 /* must succeed. */
4038 amdgpu_ras_resume(adev);
4039 queue_delayed_work(system_wq, &adev->delayed_init_work,
4040 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4041 }
d38ceaf9 4042
38eecbe0
CL
4043 if (amdgpu_sriov_vf(adev)) {
4044 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4045 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4046 }
2c738637 4047
77f3a5cd 4048 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4049 if (r)
77f3a5cd 4050 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4051
d155bef0
AB
4052 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4053 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4054 if (r)
4055 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4056
c1dd4aa6
AG
4057 /* Have stored pci confspace at hand for restore in sudden PCI error */
4058 if (amdgpu_device_cache_pci_state(adev->pdev))
4059 pci_restore_state(pdev);
4060
8c3dd61c
KHF
4061 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4062 /* this will fail for cards that aren't VGA class devices, just
4063 * ignore it */
4064 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4065 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4066
d37a3929
OC
4067 px = amdgpu_device_supports_px(ddev);
4068
4069 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4070 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4071 vga_switcheroo_register_client(adev->pdev,
4072 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4073
4074 if (px)
8c3dd61c 4075 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4076
e3c1b071 4077 if (adev->gmc.xgmi.pending_reset)
4078 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4079 msecs_to_jiffies(AMDGPU_RESUME_MS));
4080
4a74c38c
PY
4081 amdgpu_device_check_iommu_direct_map(adev);
4082
d38ceaf9 4083 return 0;
83ba126a 4084
970fd197 4085release_ras_con:
38eecbe0
CL
4086 if (amdgpu_sriov_vf(adev))
4087 amdgpu_virt_release_full_gpu(adev, true);
4088
4089 /* failed in exclusive mode due to timeout */
4090 if (amdgpu_sriov_vf(adev) &&
4091 !amdgpu_sriov_runtime(adev) &&
4092 amdgpu_virt_mmio_blocked(adev) &&
4093 !amdgpu_virt_wait_reset(adev)) {
4094 dev_err(adev->dev, "VF exclusive mode timeout\n");
4095 /* Don't send request since VF is inactive. */
4096 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4097 adev->virt.ops = NULL;
4098 r = -EAGAIN;
4099 }
970fd197
SY
4100 amdgpu_release_ras_context(adev);
4101
83ba126a 4102failed:
89041940 4103 amdgpu_vf_error_trans_all(adev);
8840a387 4104
83ba126a 4105 return r;
d38ceaf9
AD
4106}
4107
07775fc1
AG
4108static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4109{
62d5f9f7 4110
07775fc1
AG
4111 /* Clear all CPU mappings pointing to this device */
4112 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4113
4114 /* Unmap all mapped bars - Doorbell, registers and VRAM */
4115 amdgpu_device_doorbell_fini(adev);
4116
4117 iounmap(adev->rmmio);
4118 adev->rmmio = NULL;
4119 if (adev->mman.aper_base_kaddr)
4120 iounmap(adev->mman.aper_base_kaddr);
4121 adev->mman.aper_base_kaddr = NULL;
4122
4123 /* Memory manager related */
4124 if (!adev->gmc.xgmi.connected_to_cpu) {
4125 arch_phys_wc_del(adev->gmc.vram_mtrr);
4126 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4127 }
4128}
4129
d38ceaf9 4130/**
bbe04dec 4131 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4132 *
4133 * @adev: amdgpu_device pointer
4134 *
4135 * Tear down the driver info (all asics).
4136 * Called at driver shutdown.
4137 */
72c8c97b 4138void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4139{
aac89168 4140 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4141 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4142 adev->shutdown = true;
9f875167 4143
752c683d
ML
4144 /* make sure IB test finished before entering exclusive mode
4145 * to avoid preemption on IB test
4146 * */
519b8b76 4147 if (amdgpu_sriov_vf(adev)) {
752c683d 4148 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4149 amdgpu_virt_fini_data_exchange(adev);
4150 }
752c683d 4151
e5b03032
ML
4152 /* disable all interrupts */
4153 amdgpu_irq_disable_all(adev);
47fc644f 4154 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4155 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4156 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4157 else
4a580877 4158 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4159 }
8d35a259 4160 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4161
cd3a8a59 4162 if (adev->mman.initialized)
9bff18d1 4163 drain_workqueue(adev->mman.bdev.wq);
98f56188 4164
53e9d836 4165 if (adev->pm.sysfs_initialized)
7c868b59 4166 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4167 if (adev->ucode_sysfs_en)
4168 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
4169 if (adev->psp_sysfs_en)
4170 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
4171 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4172
232d1d43
SY
4173 /* disable ras feature must before hw fini */
4174 amdgpu_ras_pre_fini(adev);
4175
e9669fb7 4176 amdgpu_device_ip_fini_early(adev);
d10d0daa 4177
a3848df6
YW
4178 amdgpu_irq_fini_hw(adev);
4179
b6fd6e0f
SK
4180 if (adev->mman.initialized)
4181 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4182
d10d0daa 4183 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4184
39934d3e
VP
4185 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4186 amdgpu_device_unmap_mmio(adev);
87172e89 4187
72c8c97b
AG
4188}
4189
4190void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4191{
62d5f9f7 4192 int idx;
d37a3929 4193 bool px;
62d5f9f7 4194
8d35a259 4195 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4196 amdgpu_device_ip_fini(adev);
b31d3063 4197 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4198 adev->accel_working = false;
68ce8b24 4199 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4200
4201 amdgpu_reset_fini(adev);
4202
d38ceaf9 4203 /* free i2c buses */
4562236b
HW
4204 if (!amdgpu_device_has_dc_support(adev))
4205 amdgpu_i2c_fini(adev);
bfca0289
SL
4206
4207 if (amdgpu_emu_mode != 1)
4208 amdgpu_atombios_fini(adev);
4209
d38ceaf9
AD
4210 kfree(adev->bios);
4211 adev->bios = NULL;
d37a3929
OC
4212
4213 px = amdgpu_device_supports_px(adev_to_drm(adev));
4214
4215 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4216 apple_gmux_detect(NULL, NULL)))
84c8b22e 4217 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4218
4219 if (px)
83ba126a 4220 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4221
38d6be81 4222 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4223 vga_client_unregister(adev->pdev);
e9bc1bf7 4224
62d5f9f7
LS
4225 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4226
4227 iounmap(adev->rmmio);
4228 adev->rmmio = NULL;
4229 amdgpu_device_doorbell_fini(adev);
4230 drm_dev_exit(idx);
4231 }
4232
d155bef0
AB
4233 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4234 amdgpu_pmu_fini(adev);
72de33f8 4235 if (adev->mman.discovery_bin)
a190d1c7 4236 amdgpu_discovery_fini(adev);
72c8c97b 4237
cfbb6b00
AG
4238 amdgpu_reset_put_reset_domain(adev->reset_domain);
4239 adev->reset_domain = NULL;
4240
72c8c97b
AG
4241 kfree(adev->pci_state);
4242
d38ceaf9
AD
4243}
4244
58144d28
ND
4245/**
4246 * amdgpu_device_evict_resources - evict device resources
4247 * @adev: amdgpu device object
4248 *
4249 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4250 * of the vram memory type. Mainly used for evicting device resources
4251 * at suspend time.
4252 *
4253 */
7863c155 4254static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4255{
7863c155
ML
4256 int ret;
4257
e53d9665
ML
4258 /* No need to evict vram on APUs for suspend to ram or s2idle */
4259 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4260 return 0;
58144d28 4261
7863c155
ML
4262 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4263 if (ret)
58144d28 4264 DRM_WARN("evicting device resources failed\n");
7863c155 4265 return ret;
58144d28 4266}
d38ceaf9
AD
4267
4268/*
4269 * Suspend & resume.
4270 */
4271/**
810ddc3a 4272 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4273 *
87e3f136 4274 * @dev: drm dev pointer
87e3f136 4275 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4276 *
4277 * Puts the hw in the suspend state (all asics).
4278 * Returns 0 for success or an error on failure.
4279 * Called at driver suspend.
4280 */
de185019 4281int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4282{
a2e15b0e 4283 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4284 int r = 0;
d38ceaf9 4285
d38ceaf9
AD
4286 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4287 return 0;
4288
44779b43 4289 adev->in_suspend = true;
3fa8f89d 4290
47ea2076
SF
4291 /* Evict the majority of BOs before grabbing the full access */
4292 r = amdgpu_device_evict_resources(adev);
4293 if (r)
4294 return r;
4295
d7274ec7
BZ
4296 if (amdgpu_sriov_vf(adev)) {
4297 amdgpu_virt_fini_data_exchange(adev);
4298 r = amdgpu_virt_request_full_gpu(adev, false);
4299 if (r)
4300 return r;
4301 }
4302
3fa8f89d
S
4303 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4304 DRM_WARN("smart shift update failed\n");
4305
5f818173 4306 if (fbcon)
087451f3 4307 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4308
beff74bc 4309 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4310
5e6932fe 4311 amdgpu_ras_suspend(adev);
4312
2196927b 4313 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4314
c004d44e 4315 if (!adev->in_s0ix)
5d3a2d95 4316 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4317
7863c155
ML
4318 r = amdgpu_device_evict_resources(adev);
4319 if (r)
4320 return r;
d38ceaf9 4321
8d35a259 4322 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4323
2196927b 4324 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4325
d7274ec7
BZ
4326 if (amdgpu_sriov_vf(adev))
4327 amdgpu_virt_release_full_gpu(adev, false);
4328
d38ceaf9
AD
4329 return 0;
4330}
4331
4332/**
810ddc3a 4333 * amdgpu_device_resume - initiate device resume
d38ceaf9 4334 *
87e3f136 4335 * @dev: drm dev pointer
87e3f136 4336 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4337 *
4338 * Bring the hw back to operating state (all asics).
4339 * Returns 0 for success or an error on failure.
4340 * Called at driver resume.
4341 */
de185019 4342int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4343{
1348969a 4344 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4345 int r = 0;
d38ceaf9 4346
d7274ec7
BZ
4347 if (amdgpu_sriov_vf(adev)) {
4348 r = amdgpu_virt_request_full_gpu(adev, true);
4349 if (r)
4350 return r;
4351 }
4352
d38ceaf9
AD
4353 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4354 return 0;
4355
62498733 4356 if (adev->in_s0ix)
bc143d8b 4357 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4358
d38ceaf9 4359 /* post card */
39c640c0 4360 if (amdgpu_device_need_post(adev)) {
4d2997ab 4361 r = amdgpu_device_asic_init(adev);
74b0b157 4362 if (r)
aac89168 4363 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4364 }
d38ceaf9 4365
06ec9070 4366 r = amdgpu_device_ip_resume(adev);
d7274ec7 4367
e6707218 4368 if (r) {
aac89168 4369 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4370 goto exit;
e6707218 4371 }
8d35a259 4372 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4373
06ec9070 4374 r = amdgpu_device_ip_late_init(adev);
03161a6e 4375 if (r)
3c22c1ea 4376 goto exit;
d38ceaf9 4377
beff74bc
AD
4378 queue_delayed_work(system_wq, &adev->delayed_init_work,
4379 msecs_to_jiffies(AMDGPU_RESUME_MS));
4380
c004d44e 4381 if (!adev->in_s0ix) {
5d3a2d95
AD
4382 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4383 if (r)
3c22c1ea 4384 goto exit;
5d3a2d95 4385 }
756e6880 4386
3c22c1ea
SF
4387exit:
4388 if (amdgpu_sriov_vf(adev)) {
4389 amdgpu_virt_init_data_exchange(adev);
4390 amdgpu_virt_release_full_gpu(adev, true);
4391 }
4392
4393 if (r)
4394 return r;
4395
96a5d8d4 4396 /* Make sure IB tests flushed */
beff74bc 4397 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4398
a2e15b0e 4399 if (fbcon)
087451f3 4400 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4401
5e6932fe 4402 amdgpu_ras_resume(adev);
4403
d09ef243
AD
4404 if (adev->mode_info.num_crtc) {
4405 /*
4406 * Most of the connector probing functions try to acquire runtime pm
4407 * refs to ensure that the GPU is powered on when connector polling is
4408 * performed. Since we're calling this from a runtime PM callback,
4409 * trying to acquire rpm refs will cause us to deadlock.
4410 *
4411 * Since we're guaranteed to be holding the rpm lock, it's safe to
4412 * temporarily disable the rpm helpers so this doesn't deadlock us.
4413 */
23a1a9e5 4414#ifdef CONFIG_PM
d09ef243 4415 dev->dev->power.disable_depth++;
23a1a9e5 4416#endif
d09ef243
AD
4417 if (!adev->dc_enabled)
4418 drm_helper_hpd_irq_event(dev);
4419 else
4420 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4421#ifdef CONFIG_PM
d09ef243 4422 dev->dev->power.disable_depth--;
23a1a9e5 4423#endif
d09ef243 4424 }
44779b43
RZ
4425 adev->in_suspend = false;
4426
dc907c9d
JX
4427 if (adev->enable_mes)
4428 amdgpu_mes_self_test(adev);
4429
3fa8f89d
S
4430 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4431 DRM_WARN("smart shift update failed\n");
4432
4d3b9ae5 4433 return 0;
d38ceaf9
AD
4434}
4435
e3ecdffa
AD
4436/**
4437 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4438 *
4439 * @adev: amdgpu_device pointer
4440 *
4441 * The list of all the hardware IPs that make up the asic is walked and
4442 * the check_soft_reset callbacks are run. check_soft_reset determines
4443 * if the asic is still hung or not.
4444 * Returns true if any of the IPs are still in a hung state, false if not.
4445 */
06ec9070 4446static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4447{
4448 int i;
4449 bool asic_hang = false;
4450
f993d628
ML
4451 if (amdgpu_sriov_vf(adev))
4452 return true;
4453
8bc04c29
AD
4454 if (amdgpu_asic_need_full_reset(adev))
4455 return true;
4456
63fbf42f 4457 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4458 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4459 continue;
a1255107
AD
4460 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4461 adev->ip_blocks[i].status.hang =
4462 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4463 if (adev->ip_blocks[i].status.hang) {
aac89168 4464 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4465 asic_hang = true;
4466 }
4467 }
4468 return asic_hang;
4469}
4470
e3ecdffa
AD
4471/**
4472 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4473 *
4474 * @adev: amdgpu_device pointer
4475 *
4476 * The list of all the hardware IPs that make up the asic is walked and the
4477 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4478 * handles any IP specific hardware or software state changes that are
4479 * necessary for a soft reset to succeed.
4480 * Returns 0 on success, negative error code on failure.
4481 */
06ec9070 4482static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4483{
4484 int i, r = 0;
4485
4486 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4487 if (!adev->ip_blocks[i].status.valid)
d31a501e 4488 continue;
a1255107
AD
4489 if (adev->ip_blocks[i].status.hang &&
4490 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4491 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4492 if (r)
4493 return r;
4494 }
4495 }
4496
4497 return 0;
4498}
4499
e3ecdffa
AD
4500/**
4501 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4502 *
4503 * @adev: amdgpu_device pointer
4504 *
4505 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4506 * reset is necessary to recover.
4507 * Returns true if a full asic reset is required, false if not.
4508 */
06ec9070 4509static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4510{
da146d3b
AD
4511 int i;
4512
8bc04c29
AD
4513 if (amdgpu_asic_need_full_reset(adev))
4514 return true;
4515
da146d3b 4516 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4517 if (!adev->ip_blocks[i].status.valid)
da146d3b 4518 continue;
a1255107
AD
4519 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4520 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4521 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4522 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4523 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4524 if (adev->ip_blocks[i].status.hang) {
aac89168 4525 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4526 return true;
4527 }
4528 }
35d782fe
CZ
4529 }
4530 return false;
4531}
4532
e3ecdffa
AD
4533/**
4534 * amdgpu_device_ip_soft_reset - do a soft reset
4535 *
4536 * @adev: amdgpu_device pointer
4537 *
4538 * The list of all the hardware IPs that make up the asic is walked and the
4539 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4540 * IP specific hardware or software state changes that are necessary to soft
4541 * reset the IP.
4542 * Returns 0 on success, negative error code on failure.
4543 */
06ec9070 4544static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4545{
4546 int i, r = 0;
4547
4548 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4549 if (!adev->ip_blocks[i].status.valid)
35d782fe 4550 continue;
a1255107
AD
4551 if (adev->ip_blocks[i].status.hang &&
4552 adev->ip_blocks[i].version->funcs->soft_reset) {
4553 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4554 if (r)
4555 return r;
4556 }
4557 }
4558
4559 return 0;
4560}
4561
e3ecdffa
AD
4562/**
4563 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4564 *
4565 * @adev: amdgpu_device pointer
4566 *
4567 * The list of all the hardware IPs that make up the asic is walked and the
4568 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4569 * handles any IP specific hardware or software state changes that are
4570 * necessary after the IP has been soft reset.
4571 * Returns 0 on success, negative error code on failure.
4572 */
06ec9070 4573static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4574{
4575 int i, r = 0;
4576
4577 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4578 if (!adev->ip_blocks[i].status.valid)
35d782fe 4579 continue;
a1255107
AD
4580 if (adev->ip_blocks[i].status.hang &&
4581 adev->ip_blocks[i].version->funcs->post_soft_reset)
4582 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4583 if (r)
4584 return r;
4585 }
4586
4587 return 0;
4588}
4589
e3ecdffa 4590/**
c33adbc7 4591 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4592 *
4593 * @adev: amdgpu_device pointer
4594 *
4595 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4596 * restore things like GPUVM page tables after a GPU reset where
4597 * the contents of VRAM might be lost.
403009bf
CK
4598 *
4599 * Returns:
4600 * 0 on success, negative error code on failure.
e3ecdffa 4601 */
c33adbc7 4602static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4603{
c41d1cf6 4604 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4605 struct amdgpu_bo *shadow;
e18aaea7 4606 struct amdgpu_bo_vm *vmbo;
403009bf 4607 long r = 1, tmo;
c41d1cf6
ML
4608
4609 if (amdgpu_sriov_runtime(adev))
b045d3af 4610 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4611 else
4612 tmo = msecs_to_jiffies(100);
4613
aac89168 4614 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4615 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4616 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4617 /* If vm is compute context or adev is APU, shadow will be NULL */
4618 if (!vmbo->shadow)
4619 continue;
4620 shadow = vmbo->shadow;
4621
403009bf 4622 /* No need to recover an evicted BO */
d3116756
CK
4623 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4624 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4625 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4626 continue;
4627
4628 r = amdgpu_bo_restore_shadow(shadow, &next);
4629 if (r)
4630 break;
4631
c41d1cf6 4632 if (fence) {
1712fb1a 4633 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4634 dma_fence_put(fence);
4635 fence = next;
1712fb1a 4636 if (tmo == 0) {
4637 r = -ETIMEDOUT;
c41d1cf6 4638 break;
1712fb1a 4639 } else if (tmo < 0) {
4640 r = tmo;
4641 break;
4642 }
403009bf
CK
4643 } else {
4644 fence = next;
c41d1cf6 4645 }
c41d1cf6
ML
4646 }
4647 mutex_unlock(&adev->shadow_list_lock);
4648
403009bf
CK
4649 if (fence)
4650 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4651 dma_fence_put(fence);
4652
1712fb1a 4653 if (r < 0 || tmo <= 0) {
aac89168 4654 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4655 return -EIO;
4656 }
c41d1cf6 4657
aac89168 4658 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4659 return 0;
c41d1cf6
ML
4660}
4661
a90ad3c2 4662
e3ecdffa 4663/**
06ec9070 4664 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4665 *
982a820b 4666 * @adev: amdgpu_device pointer
87e3f136 4667 * @from_hypervisor: request from hypervisor
5740682e
ML
4668 *
4669 * do VF FLR and reinitialize Asic
3f48c681 4670 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4671 */
4672static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4673 bool from_hypervisor)
5740682e
ML
4674{
4675 int r;
a5f67c93 4676 struct amdgpu_hive_info *hive = NULL;
7258fa31 4677 int retry_limit = 0;
5740682e 4678
7258fa31 4679retry:
c004d44e 4680 amdgpu_amdkfd_pre_reset(adev);
428890a3 4681
5740682e
ML
4682 if (from_hypervisor)
4683 r = amdgpu_virt_request_full_gpu(adev, true);
4684 else
4685 r = amdgpu_virt_reset_gpu(adev);
4686 if (r)
4687 return r;
a90ad3c2
ML
4688
4689 /* Resume IP prior to SMC */
06ec9070 4690 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4691 if (r)
4692 goto error;
a90ad3c2 4693
c9ffa427 4694 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4695
7a3e0bb2
RZ
4696 r = amdgpu_device_fw_loading(adev);
4697 if (r)
4698 return r;
4699
a90ad3c2 4700 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4701 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4702 if (r)
4703 goto error;
a90ad3c2 4704
a5f67c93
ZL
4705 hive = amdgpu_get_xgmi_hive(adev);
4706 /* Update PSP FW topology after reset */
4707 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4708 r = amdgpu_xgmi_update_topology(hive, adev);
4709
4710 if (hive)
4711 amdgpu_put_xgmi_hive(hive);
4712
4713 if (!r) {
4714 amdgpu_irq_gpu_reset_resume_helper(adev);
4715 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4716
c004d44e 4717 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4718 }
a90ad3c2 4719
abc34253 4720error:
c41d1cf6 4721 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4722 amdgpu_inc_vram_lost(adev);
c33adbc7 4723 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4724 }
437f3e0b 4725 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4726
7258fa31
SK
4727 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4728 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4729 retry_limit++;
4730 goto retry;
4731 } else
4732 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4733 }
4734
a90ad3c2
ML
4735 return r;
4736}
4737
9a1cddd6 4738/**
4739 * amdgpu_device_has_job_running - check if there is any job in mirror list
4740 *
982a820b 4741 * @adev: amdgpu_device pointer
9a1cddd6 4742 *
4743 * check if there is any job in mirror list
4744 */
4745bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4746{
4747 int i;
4748 struct drm_sched_job *job;
4749
4750 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4751 struct amdgpu_ring *ring = adev->rings[i];
4752
4753 if (!ring || !ring->sched.thread)
4754 continue;
4755
4756 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4757 job = list_first_entry_or_null(&ring->sched.pending_list,
4758 struct drm_sched_job, list);
9a1cddd6 4759 spin_unlock(&ring->sched.job_list_lock);
4760 if (job)
4761 return true;
4762 }
4763 return false;
4764}
4765
12938fad
CK
4766/**
4767 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4768 *
982a820b 4769 * @adev: amdgpu_device pointer
12938fad
CK
4770 *
4771 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4772 * a hung GPU.
4773 */
4774bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4775{
12938fad 4776
3ba7b418
AG
4777 if (amdgpu_gpu_recovery == 0)
4778 goto disabled;
4779
1a11a65d
YC
4780 /* Skip soft reset check in fatal error mode */
4781 if (!amdgpu_ras_is_poison_mode_supported(adev))
4782 return true;
4783
3ba7b418
AG
4784 if (amdgpu_sriov_vf(adev))
4785 return true;
4786
4787 if (amdgpu_gpu_recovery == -1) {
4788 switch (adev->asic_type) {
b3523c45
AD
4789#ifdef CONFIG_DRM_AMDGPU_SI
4790 case CHIP_VERDE:
4791 case CHIP_TAHITI:
4792 case CHIP_PITCAIRN:
4793 case CHIP_OLAND:
4794 case CHIP_HAINAN:
4795#endif
4796#ifdef CONFIG_DRM_AMDGPU_CIK
4797 case CHIP_KAVERI:
4798 case CHIP_KABINI:
4799 case CHIP_MULLINS:
4800#endif
4801 case CHIP_CARRIZO:
4802 case CHIP_STONEY:
4803 case CHIP_CYAN_SKILLFISH:
3ba7b418 4804 goto disabled;
b3523c45
AD
4805 default:
4806 break;
3ba7b418 4807 }
12938fad
CK
4808 }
4809
4810 return true;
3ba7b418
AG
4811
4812disabled:
aac89168 4813 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4814 return false;
12938fad
CK
4815}
4816
5c03e584
FX
4817int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4818{
47fc644f
SS
4819 u32 i;
4820 int ret = 0;
5c03e584 4821
47fc644f 4822 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4823
47fc644f 4824 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4825
47fc644f
SS
4826 /* disable BM */
4827 pci_clear_master(adev->pdev);
5c03e584 4828
47fc644f 4829 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4830
47fc644f
SS
4831 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4832 dev_info(adev->dev, "GPU smu mode1 reset\n");
4833 ret = amdgpu_dpm_mode1_reset(adev);
4834 } else {
4835 dev_info(adev->dev, "GPU psp mode1 reset\n");
4836 ret = psp_gpu_reset(adev);
4837 }
5c03e584 4838
47fc644f
SS
4839 if (ret)
4840 dev_err(adev->dev, "GPU mode1 reset failed\n");
5c03e584 4841
47fc644f 4842 amdgpu_device_load_pci_state(adev->pdev);
5c03e584 4843
47fc644f
SS
4844 /* wait for asic to come out of reset */
4845 for (i = 0; i < adev->usec_timeout; i++) {
4846 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4847
47fc644f
SS
4848 if (memsize != 0xffffffff)
4849 break;
4850 udelay(1);
4851 }
5c03e584 4852
47fc644f
SS
4853 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4854 return ret;
5c03e584 4855}
5c6dd71e 4856
e3c1b071 4857int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4858 struct amdgpu_reset_context *reset_context)
26bc5340 4859{
5c1e6fa4 4860 int i, r = 0;
04442bf7
LL
4861 struct amdgpu_job *job = NULL;
4862 bool need_full_reset =
4863 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4864
4865 if (reset_context->reset_req_dev == adev)
4866 job = reset_context->job;
71182665 4867
b602ca5f
TZ
4868 if (amdgpu_sriov_vf(adev)) {
4869 /* stop the data exchange thread */
4870 amdgpu_virt_fini_data_exchange(adev);
4871 }
4872
9e225fb9
AG
4873 amdgpu_fence_driver_isr_toggle(adev, true);
4874
71182665 4875 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4876 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4877 struct amdgpu_ring *ring = adev->rings[i];
4878
51687759 4879 if (!ring || !ring->sched.thread)
0875dc9e 4880 continue;
5740682e 4881
c530b02f
JZ
4882 /*clear job fence from fence drv to avoid force_completion
4883 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4884 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4885
2f9d4084
ML
4886 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4887 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4888 }
d38ceaf9 4889
9e225fb9
AG
4890 amdgpu_fence_driver_isr_toggle(adev, false);
4891
ff99849b 4892 if (job && job->vm)
222b5f04
AG
4893 drm_sched_increase_karma(&job->base);
4894
04442bf7 4895 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4896 /* If reset handler not implemented, continue; otherwise return */
4897 if (r == -ENOSYS)
4898 r = 0;
4899 else
04442bf7
LL
4900 return r;
4901
1d721ed6 4902 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4903 if (!amdgpu_sriov_vf(adev)) {
4904
4905 if (!need_full_reset)
4906 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4907
360cd081
LG
4908 if (!need_full_reset && amdgpu_gpu_recovery &&
4909 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4910 amdgpu_device_ip_pre_soft_reset(adev);
4911 r = amdgpu_device_ip_soft_reset(adev);
4912 amdgpu_device_ip_post_soft_reset(adev);
4913 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4914 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4915 need_full_reset = true;
4916 }
4917 }
4918
4919 if (need_full_reset)
4920 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4921 if (need_full_reset)
4922 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4923 else
4924 clear_bit(AMDGPU_NEED_FULL_RESET,
4925 &reset_context->flags);
26bc5340
AG
4926 }
4927
4928 return r;
4929}
4930
15fd09a0
SA
4931static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4932{
15fd09a0
SA
4933 int i;
4934
38a15ad9 4935 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4936
4937 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4938 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4939 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4940 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4941 }
4942
4943 return 0;
4944}
4945
3d8785f6
SA
4946#ifdef CONFIG_DEV_COREDUMP
4947static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4948 size_t count, void *data, size_t datalen)
4949{
4950 struct drm_printer p;
4951 struct amdgpu_device *adev = data;
4952 struct drm_print_iterator iter;
4953 int i;
4954
4955 iter.data = buffer;
4956 iter.offset = 0;
4957 iter.start = offset;
4958 iter.remain = count;
4959
4960 p = drm_coredump_printer(&iter);
4961
4962 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4963 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4964 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4965 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4966 if (adev->reset_task_info.pid)
4967 drm_printf(&p, "process_name: %s PID: %d\n",
4968 adev->reset_task_info.process_name,
4969 adev->reset_task_info.pid);
4970
4971 if (adev->reset_vram_lost)
4972 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4973 if (adev->num_regs) {
4974 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4975
4976 for (i = 0; i < adev->num_regs; i++)
4977 drm_printf(&p, "0x%08x: 0x%08x\n",
4978 adev->reset_dump_reg_list[i],
4979 adev->reset_dump_reg_value[i]);
4980 }
4981
4982 return count - iter.remain;
4983}
4984
4985static void amdgpu_devcoredump_free(void *data)
4986{
4987}
4988
4989static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4990{
4991 struct drm_device *dev = adev_to_drm(adev);
4992
4993 ktime_get_ts64(&adev->reset_time);
4994 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4995 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4996}
4997#endif
4998
04442bf7
LL
4999int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5000 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5001{
5002 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5003 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5004 int r = 0;
f5c7e779 5005 bool gpu_reset_for_dev_remove = 0;
26bc5340 5006
04442bf7
LL
5007 /* Try reset handler method first */
5008 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5009 reset_list);
15fd09a0 5010 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5011
5012 reset_context->reset_device_list = device_list_handle;
04442bf7 5013 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
5014 /* If reset handler not implemented, continue; otherwise return */
5015 if (r == -ENOSYS)
5016 r = 0;
5017 else
04442bf7
LL
5018 return r;
5019
5020 /* Reset handler not implemented, use the default method */
5021 need_full_reset =
5022 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5023 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5024
f5c7e779
YC
5025 gpu_reset_for_dev_remove =
5026 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5027 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5028
26bc5340 5029 /*
655ce9cb 5030 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5031 * to allow proper links negotiation in FW (within 1 sec)
5032 */
7ac71382 5033 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5034 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5035 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5036 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5037 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5038 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5039 r = -EALREADY;
5040 } else
5041 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5042
041a62bc 5043 if (r) {
aac89168 5044 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5045 r, adev_to_drm(tmp_adev)->unique);
041a62bc 5046 break;
ce316fa5
LM
5047 }
5048 }
5049
041a62bc
AG
5050 /* For XGMI wait for all resets to complete before proceed */
5051 if (!r) {
655ce9cb 5052 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5053 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5054 flush_work(&tmp_adev->xgmi_reset_work);
5055 r = tmp_adev->asic_reset_res;
5056 if (r)
5057 break;
ce316fa5
LM
5058 }
5059 }
5060 }
ce316fa5 5061 }
26bc5340 5062
43c4d576 5063 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5064 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 5065 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5066 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5067 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
5068 }
5069
00eaa571 5070 amdgpu_ras_intr_cleared();
43c4d576 5071 }
00eaa571 5072
f5c7e779
YC
5073 /* Since the mode1 reset affects base ip blocks, the
5074 * phase1 ip blocks need to be resumed. Otherwise there
5075 * will be a BIOS signature error and the psp bootloader
5076 * can't load kdb on the next amdgpu install.
5077 */
5078 if (gpu_reset_for_dev_remove) {
5079 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5080 amdgpu_device_ip_resume_phase1(tmp_adev);
5081
5082 goto end;
5083 }
5084
655ce9cb 5085 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5086 if (need_full_reset) {
5087 /* post card */
e3c1b071 5088 r = amdgpu_device_asic_init(tmp_adev);
5089 if (r) {
aac89168 5090 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5091 } else {
26bc5340 5092 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
5093 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
5094 if (r)
5095 goto out;
5096
26bc5340
AG
5097 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5098 if (r)
5099 goto out;
5100
5101 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
5102#ifdef CONFIG_DEV_COREDUMP
5103 tmp_adev->reset_vram_lost = vram_lost;
5104 memset(&tmp_adev->reset_task_info, 0,
5105 sizeof(tmp_adev->reset_task_info));
5106 if (reset_context->job && reset_context->job->vm)
5107 tmp_adev->reset_task_info =
5108 reset_context->job->vm->task_info;
5109 amdgpu_reset_capture_coredumpm(tmp_adev);
5110#endif
26bc5340 5111 if (vram_lost) {
77e7f829 5112 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5113 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5114 }
5115
26bc5340
AG
5116 r = amdgpu_device_fw_loading(tmp_adev);
5117 if (r)
5118 return r;
5119
5120 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5121 if (r)
5122 goto out;
5123
5124 if (vram_lost)
5125 amdgpu_device_fill_reset_magic(tmp_adev);
5126
fdafb359
EQ
5127 /*
5128 * Add this ASIC as tracked as reset was already
5129 * complete successfully.
5130 */
5131 amdgpu_register_gpu_instance(tmp_adev);
5132
04442bf7
LL
5133 if (!reset_context->hive &&
5134 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5135 amdgpu_xgmi_add_device(tmp_adev);
5136
7c04ca50 5137 r = amdgpu_device_ip_late_init(tmp_adev);
5138 if (r)
5139 goto out;
5140
087451f3 5141 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5142
e8fbaf03
GC
5143 /*
5144 * The GPU enters bad state once faulty pages
5145 * by ECC has reached the threshold, and ras
5146 * recovery is scheduled next. So add one check
5147 * here to break recovery if it indeed exceeds
5148 * bad page threshold, and remind user to
5149 * retire this GPU or setting one bigger
5150 * bad_page_threshold value to fix this once
5151 * probing driver again.
5152 */
11003c68 5153 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5154 /* must succeed. */
5155 amdgpu_ras_resume(tmp_adev);
5156 } else {
5157 r = -EINVAL;
5158 goto out;
5159 }
e79a04d5 5160
26bc5340 5161 /* Update PSP FW topology after reset */
04442bf7
LL
5162 if (reset_context->hive &&
5163 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5164 r = amdgpu_xgmi_update_topology(
5165 reset_context->hive, tmp_adev);
26bc5340
AG
5166 }
5167 }
5168
26bc5340
AG
5169out:
5170 if (!r) {
5171 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5172 r = amdgpu_ib_ring_tests(tmp_adev);
5173 if (r) {
5174 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5175 need_full_reset = true;
5176 r = -EAGAIN;
5177 goto end;
5178 }
5179 }
5180
5181 if (!r)
5182 r = amdgpu_device_recover_vram(tmp_adev);
5183 else
5184 tmp_adev->asic_reset_res = r;
5185 }
5186
5187end:
04442bf7
LL
5188 if (need_full_reset)
5189 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5190 else
5191 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5192 return r;
5193}
5194
e923be99 5195static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5196{
5740682e 5197
a3a09142
AD
5198 switch (amdgpu_asic_reset_method(adev)) {
5199 case AMD_RESET_METHOD_MODE1:
5200 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5201 break;
5202 case AMD_RESET_METHOD_MODE2:
5203 adev->mp1_state = PP_MP1_STATE_RESET;
5204 break;
5205 default:
5206 adev->mp1_state = PP_MP1_STATE_NONE;
5207 break;
5208 }
26bc5340 5209}
d38ceaf9 5210
e923be99 5211static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5212{
89041940 5213 amdgpu_vf_error_trans_all(adev);
a3a09142 5214 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5215}
5216
3f12acc8
EQ
5217static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5218{
5219 struct pci_dev *p = NULL;
5220
5221 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5222 adev->pdev->bus->number, 1);
5223 if (p) {
5224 pm_runtime_enable(&(p->dev));
5225 pm_runtime_resume(&(p->dev));
5226 }
b85e285e
YY
5227
5228 pci_dev_put(p);
3f12acc8
EQ
5229}
5230
5231static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5232{
5233 enum amd_reset_method reset_method;
5234 struct pci_dev *p = NULL;
5235 u64 expires;
5236
5237 /*
5238 * For now, only BACO and mode1 reset are confirmed
5239 * to suffer the audio issue without proper suspended.
5240 */
5241 reset_method = amdgpu_asic_reset_method(adev);
5242 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5243 (reset_method != AMD_RESET_METHOD_MODE1))
5244 return -EINVAL;
5245
5246 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5247 adev->pdev->bus->number, 1);
5248 if (!p)
5249 return -ENODEV;
5250
5251 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5252 if (!expires)
5253 /*
5254 * If we cannot get the audio device autosuspend delay,
5255 * a fixed 4S interval will be used. Considering 3S is
5256 * the audio controller default autosuspend delay setting.
5257 * 4S used here is guaranteed to cover that.
5258 */
54b7feb9 5259 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5260
5261 while (!pm_runtime_status_suspended(&(p->dev))) {
5262 if (!pm_runtime_suspend(&(p->dev)))
5263 break;
5264
5265 if (expires < ktime_get_mono_fast_ns()) {
5266 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5267 pci_dev_put(p);
3f12acc8
EQ
5268 /* TODO: abort the succeeding gpu reset? */
5269 return -ETIMEDOUT;
5270 }
5271 }
5272
5273 pm_runtime_disable(&(p->dev));
5274
b85e285e 5275 pci_dev_put(p);
3f12acc8
EQ
5276 return 0;
5277}
5278
d193b12b 5279static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5280{
5281 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5282
5283#if defined(CONFIG_DEBUG_FS)
5284 if (!amdgpu_sriov_vf(adev))
5285 cancel_work(&adev->reset_work);
5286#endif
5287
5288 if (adev->kfd.dev)
5289 cancel_work(&adev->kfd.reset_work);
5290
5291 if (amdgpu_sriov_vf(adev))
5292 cancel_work(&adev->virt.flr_work);
5293
5294 if (con && adev->ras_enabled)
5295 cancel_work(&con->recovery_work);
5296
5297}
5298
26bc5340 5299/**
6e9c65f7 5300 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5301 *
982a820b 5302 * @adev: amdgpu_device pointer
26bc5340 5303 * @job: which job trigger hang
80bd2de1 5304 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5305 *
5306 * Attempt to reset the GPU if it has hung (all asics).
5307 * Attempt to do soft-reset or full-reset and reinitialize Asic
5308 * Returns 0 for success or an error on failure.
5309 */
5310
cf727044 5311int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5312 struct amdgpu_job *job,
5313 struct amdgpu_reset_context *reset_context)
26bc5340 5314{
1d721ed6 5315 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5316 bool job_signaled = false;
26bc5340 5317 struct amdgpu_hive_info *hive = NULL;
26bc5340 5318 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5319 int i, r = 0;
bb5c7235 5320 bool need_emergency_restart = false;
3f12acc8 5321 bool audio_suspended = false;
f5c7e779
YC
5322 bool gpu_reset_for_dev_remove = false;
5323
5324 gpu_reset_for_dev_remove =
5325 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5326 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5327
6e3cd2a9 5328 /*
bb5c7235
WS
5329 * Special case: RAS triggered and full reset isn't supported
5330 */
5331 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5332
d5ea093e
AG
5333 /*
5334 * Flush RAM to disk so that after reboot
5335 * the user can read log and see why the system rebooted.
5336 */
bb5c7235 5337 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5338 DRM_WARN("Emergency reboot.");
5339
5340 ksys_sync_helper();
5341 emergency_restart();
5342 }
5343
b823821f 5344 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5345 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5346
175ac6ec
ZL
5347 if (!amdgpu_sriov_vf(adev))
5348 hive = amdgpu_get_xgmi_hive(adev);
681260df 5349 if (hive)
53b3f8f4 5350 mutex_lock(&hive->hive_lock);
26bc5340 5351
f1549c09
LG
5352 reset_context->job = job;
5353 reset_context->hive = hive;
9e94d22c
EQ
5354 /*
5355 * Build list of devices to reset.
5356 * In case we are in XGMI hive mode, resort the device list
5357 * to put adev in the 1st position.
5358 */
5359 INIT_LIST_HEAD(&device_list);
175ac6ec 5360 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5361 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5362 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5363 if (gpu_reset_for_dev_remove && adev->shutdown)
5364 tmp_adev->shutdown = true;
5365 }
655ce9cb 5366 if (!list_is_first(&adev->reset_list, &device_list))
5367 list_rotate_to_front(&adev->reset_list, &device_list);
5368 device_list_handle = &device_list;
26bc5340 5369 } else {
655ce9cb 5370 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5371 device_list_handle = &device_list;
5372 }
5373
e923be99
AG
5374 /* We need to lock reset domain only once both for XGMI and single device */
5375 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5376 reset_list);
3675c2f2 5377 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5378
1d721ed6 5379 /* block all schedulers and reset given job's ring */
655ce9cb 5380 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5381
e923be99 5382 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5383
3f12acc8
EQ
5384 /*
5385 * Try to put the audio codec into suspend state
5386 * before gpu reset started.
5387 *
5388 * Due to the power domain of the graphics device
5389 * is shared with AZ power domain. Without this,
5390 * we may change the audio hardware from behind
5391 * the audio driver's back. That will trigger
5392 * some audio codec errors.
5393 */
5394 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5395 audio_suspended = true;
5396
9e94d22c
EQ
5397 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5398
52fb44cf
EQ
5399 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5400
c004d44e 5401 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5402 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5403
12ffa55d
AG
5404 /*
5405 * Mark these ASICs to be reseted as untracked first
5406 * And add them back after reset completed
5407 */
5408 amdgpu_unregister_gpu_instance(tmp_adev);
5409
163d4cd2 5410 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5411
f1c1314b 5412 /* disable ras on ALL IPs */
bb5c7235 5413 if (!need_emergency_restart &&
b823821f 5414 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5415 amdgpu_ras_suspend(tmp_adev);
5416
1d721ed6
AG
5417 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5418 struct amdgpu_ring *ring = tmp_adev->rings[i];
5419
5420 if (!ring || !ring->sched.thread)
5421 continue;
5422
0b2d2c2e 5423 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5424
bb5c7235 5425 if (need_emergency_restart)
7c6e68c7 5426 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5427 }
8f8c80f4 5428 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5429 }
5430
bb5c7235 5431 if (need_emergency_restart)
7c6e68c7
AG
5432 goto skip_sched_resume;
5433
1d721ed6
AG
5434 /*
5435 * Must check guilty signal here since after this point all old
5436 * HW fences are force signaled.
5437 *
5438 * job->base holds a reference to parent fence
5439 */
f6a3f660 5440 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5441 job_signaled = true;
1d721ed6
AG
5442 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5443 goto skip_hw_reset;
5444 }
5445
26bc5340 5446retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5447 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5448 if (gpu_reset_for_dev_remove) {
5449 /* Workaroud for ASICs need to disable SMC first */
5450 amdgpu_device_smu_fini_early(tmp_adev);
5451 }
f1549c09 5452 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5453 /*TODO Should we stop ?*/
5454 if (r) {
aac89168 5455 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5456 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5457 tmp_adev->asic_reset_res = r;
5458 }
247c7b0d
AG
5459
5460 /*
5461 * Drop all pending non scheduler resets. Scheduler resets
5462 * were already dropped during drm_sched_stop
5463 */
d193b12b 5464 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5465 }
5466
5467 /* Actual ASIC resets if needed.*/
4f30d920 5468 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5469 if (amdgpu_sriov_vf(adev)) {
5470 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5471 if (r)
5472 adev->asic_reset_res = r;
950d6425 5473
28606c4e
YC
5474 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5475 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5476 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
950d6425 5477 amdgpu_ras_resume(adev);
26bc5340 5478 } else {
f1549c09 5479 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5480 if (r && r == -EAGAIN)
26bc5340 5481 goto retry;
f5c7e779
YC
5482
5483 if (!r && gpu_reset_for_dev_remove)
5484 goto recover_end;
26bc5340
AG
5485 }
5486
1d721ed6
AG
5487skip_hw_reset:
5488
26bc5340 5489 /* Post ASIC reset for all devs .*/
655ce9cb 5490 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5491
1d721ed6
AG
5492 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5493 struct amdgpu_ring *ring = tmp_adev->rings[i];
5494
5495 if (!ring || !ring->sched.thread)
5496 continue;
5497
6868a2c4 5498 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5499 }
5500
693073a0 5501 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5502 amdgpu_mes_self_test(tmp_adev);
5503
1053b9c9 5504 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5505 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5506 }
5507
7258fa31
SK
5508 if (tmp_adev->asic_reset_res)
5509 r = tmp_adev->asic_reset_res;
5510
1d721ed6 5511 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5512
5513 if (r) {
5514 /* bad news, how to tell it to userspace ? */
12ffa55d 5515 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5516 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5517 } else {
12ffa55d 5518 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5519 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5520 DRM_WARN("smart shift update failed\n");
26bc5340 5521 }
7c6e68c7 5522 }
26bc5340 5523
7c6e68c7 5524skip_sched_resume:
655ce9cb 5525 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5526 /* unlock kfd: SRIOV would do it separately */
c004d44e 5527 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5528 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5529
5530 /* kfd_post_reset will do nothing if kfd device is not initialized,
5531 * need to bring up kfd here if it's not be initialized before
5532 */
5533 if (!adev->kfd.init_complete)
5534 amdgpu_amdkfd_device_init(adev);
5535
3f12acc8
EQ
5536 if (audio_suspended)
5537 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5538
5539 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5540
5541 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5542 }
5543
f5c7e779 5544recover_end:
e923be99
AG
5545 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5546 reset_list);
5547 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5548
9e94d22c 5549 if (hive) {
9e94d22c 5550 mutex_unlock(&hive->hive_lock);
d95e8e97 5551 amdgpu_put_xgmi_hive(hive);
9e94d22c 5552 }
26bc5340 5553
f287a3c5 5554 if (r)
26bc5340 5555 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5556
5557 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5558 return r;
5559}
5560
e3ecdffa
AD
5561/**
5562 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5563 *
5564 * @adev: amdgpu_device pointer
5565 *
5566 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5567 * and lanes) of the slot the device is in. Handles APUs and
5568 * virtualized environments where PCIE config space may not be available.
5569 */
5494d864 5570static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5571{
5d9a6330 5572 struct pci_dev *pdev;
c5313457
HK
5573 enum pci_bus_speed speed_cap, platform_speed_cap;
5574 enum pcie_link_width platform_link_width;
d0dd7f0c 5575
cd474ba0
AD
5576 if (amdgpu_pcie_gen_cap)
5577 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5578
cd474ba0
AD
5579 if (amdgpu_pcie_lane_cap)
5580 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5581
cd474ba0
AD
5582 /* covers APUs as well */
5583 if (pci_is_root_bus(adev->pdev->bus)) {
5584 if (adev->pm.pcie_gen_mask == 0)
5585 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5586 if (adev->pm.pcie_mlw_mask == 0)
5587 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5588 return;
cd474ba0 5589 }
d0dd7f0c 5590
c5313457
HK
5591 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5592 return;
5593
dbaa922b
AD
5594 pcie_bandwidth_available(adev->pdev, NULL,
5595 &platform_speed_cap, &platform_link_width);
c5313457 5596
cd474ba0 5597 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5598 /* asic caps */
5599 pdev = adev->pdev;
5600 speed_cap = pcie_get_speed_cap(pdev);
5601 if (speed_cap == PCI_SPEED_UNKNOWN) {
5602 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5603 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5604 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5605 } else {
2b3a1f51
FX
5606 if (speed_cap == PCIE_SPEED_32_0GT)
5607 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5608 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5609 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5610 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5611 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5612 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5613 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5614 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5615 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5616 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5617 else if (speed_cap == PCIE_SPEED_8_0GT)
5618 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5619 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5620 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5621 else if (speed_cap == PCIE_SPEED_5_0GT)
5622 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5623 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5624 else
5625 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5626 }
5627 /* platform caps */
c5313457 5628 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5629 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5630 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5631 } else {
2b3a1f51
FX
5632 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5633 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5634 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5635 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5636 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5637 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5638 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5639 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5640 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5641 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5642 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5643 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5644 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5645 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5646 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5647 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5648 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5649 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5650 else
5651 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5652
cd474ba0
AD
5653 }
5654 }
5655 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5656 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5657 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5658 } else {
c5313457 5659 switch (platform_link_width) {
5d9a6330 5660 case PCIE_LNK_X32:
cd474ba0
AD
5661 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5662 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5663 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5664 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5665 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5666 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5667 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5668 break;
5d9a6330 5669 case PCIE_LNK_X16:
cd474ba0
AD
5670 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5671 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5672 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5673 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5674 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5675 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5676 break;
5d9a6330 5677 case PCIE_LNK_X12:
cd474ba0
AD
5678 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5679 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5680 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5681 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5682 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5683 break;
5d9a6330 5684 case PCIE_LNK_X8:
cd474ba0
AD
5685 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5686 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5687 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5688 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5689 break;
5d9a6330 5690 case PCIE_LNK_X4:
cd474ba0
AD
5691 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5692 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5693 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5694 break;
5d9a6330 5695 case PCIE_LNK_X2:
cd474ba0
AD
5696 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5697 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5698 break;
5d9a6330 5699 case PCIE_LNK_X1:
cd474ba0
AD
5700 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5701 break;
5702 default:
5703 break;
5704 }
d0dd7f0c
AD
5705 }
5706 }
5707}
d38ceaf9 5708
08a2fd23
RE
5709/**
5710 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5711 *
5712 * @adev: amdgpu_device pointer
5713 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5714 *
5715 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5716 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5717 * @peer_adev.
5718 */
5719bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5720 struct amdgpu_device *peer_adev)
5721{
5722#ifdef CONFIG_HSA_AMD_P2P
5723 uint64_t address_mask = peer_adev->dev->dma_mask ?
5724 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5725 resource_size_t aper_limit =
5726 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5727 bool p2p_access =
5728 !adev->gmc.xgmi.connected_to_cpu &&
5729 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5730
5731 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5732 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5733 !(adev->gmc.aper_base & address_mask ||
5734 aper_limit & address_mask));
5735#else
5736 return false;
5737#endif
5738}
5739
361dbd01
AD
5740int amdgpu_device_baco_enter(struct drm_device *dev)
5741{
1348969a 5742 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5743 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5744
6ab68650 5745 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5746 return -ENOTSUPP;
5747
8ab0d6f0 5748 if (ras && adev->ras_enabled &&
acdae216 5749 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5750 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5751
9530273e 5752 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5753}
5754
5755int amdgpu_device_baco_exit(struct drm_device *dev)
5756{
1348969a 5757 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5758 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5759 int ret = 0;
361dbd01 5760
6ab68650 5761 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5762 return -ENOTSUPP;
5763
9530273e
EQ
5764 ret = amdgpu_dpm_baco_exit(adev);
5765 if (ret)
5766 return ret;
7a22677b 5767
8ab0d6f0 5768 if (ras && adev->ras_enabled &&
acdae216 5769 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5770 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5771
1bece222
CL
5772 if (amdgpu_passthrough(adev) &&
5773 adev->nbio.funcs->clear_doorbell_interrupt)
5774 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5775
7a22677b 5776 return 0;
361dbd01 5777}
c9a6b82f
AG
5778
5779/**
5780 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5781 * @pdev: PCI device struct
5782 * @state: PCI channel state
5783 *
5784 * Description: Called when a PCI error is detected.
5785 *
5786 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5787 */
5788pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5789{
5790 struct drm_device *dev = pci_get_drvdata(pdev);
5791 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5792 int i;
c9a6b82f
AG
5793
5794 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5795
6894305c
AG
5796 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5797 DRM_WARN("No support for XGMI hive yet...");
5798 return PCI_ERS_RESULT_DISCONNECT;
5799 }
5800
e17e27f9
GC
5801 adev->pci_channel_state = state;
5802
c9a6b82f
AG
5803 switch (state) {
5804 case pci_channel_io_normal:
5805 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5806 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5807 case pci_channel_io_frozen:
5808 /*
d0fb18b5 5809 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5810 * to GPU during PCI error recovery
5811 */
3675c2f2 5812 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5813 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5814
5815 /*
5816 * Block any work scheduling as we do for regular GPU reset
5817 * for the duration of the recovery
5818 */
5819 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5820 struct amdgpu_ring *ring = adev->rings[i];
5821
5822 if (!ring || !ring->sched.thread)
5823 continue;
5824
5825 drm_sched_stop(&ring->sched, NULL);
5826 }
8f8c80f4 5827 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5828 return PCI_ERS_RESULT_NEED_RESET;
5829 case pci_channel_io_perm_failure:
5830 /* Permanent error, prepare for device removal */
5831 return PCI_ERS_RESULT_DISCONNECT;
5832 }
5833
5834 return PCI_ERS_RESULT_NEED_RESET;
5835}
5836
5837/**
5838 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5839 * @pdev: pointer to PCI device
5840 */
5841pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5842{
5843
5844 DRM_INFO("PCI error: mmio enabled callback!!\n");
5845
5846 /* TODO - dump whatever for debugging purposes */
5847
5848 /* This called only if amdgpu_pci_error_detected returns
5849 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5850 * works, no need to reset slot.
5851 */
5852
5853 return PCI_ERS_RESULT_RECOVERED;
5854}
5855
5856/**
5857 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5858 * @pdev: PCI device struct
5859 *
5860 * Description: This routine is called by the pci error recovery
5861 * code after the PCI slot has been reset, just before we
5862 * should resume normal operations.
5863 */
5864pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5865{
5866 struct drm_device *dev = pci_get_drvdata(pdev);
5867 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5868 int r, i;
04442bf7 5869 struct amdgpu_reset_context reset_context;
362c7b91 5870 u32 memsize;
7ac71382 5871 struct list_head device_list;
c9a6b82f
AG
5872
5873 DRM_INFO("PCI error: slot reset callback!!\n");
5874
04442bf7
LL
5875 memset(&reset_context, 0, sizeof(reset_context));
5876
7ac71382 5877 INIT_LIST_HEAD(&device_list);
655ce9cb 5878 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5879
362c7b91
AG
5880 /* wait for asic to come out of reset */
5881 msleep(500);
5882
7ac71382 5883 /* Restore PCI confspace */
c1dd4aa6 5884 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5885
362c7b91
AG
5886 /* confirm ASIC came out of reset */
5887 for (i = 0; i < adev->usec_timeout; i++) {
5888 memsize = amdgpu_asic_get_config_memsize(adev);
5889
5890 if (memsize != 0xffffffff)
5891 break;
5892 udelay(1);
5893 }
5894 if (memsize == 0xffffffff) {
5895 r = -ETIME;
5896 goto out;
5897 }
5898
04442bf7
LL
5899 reset_context.method = AMD_RESET_METHOD_NONE;
5900 reset_context.reset_req_dev = adev;
5901 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5902 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5903
7afefb81 5904 adev->no_hw_access = true;
04442bf7 5905 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5906 adev->no_hw_access = false;
c9a6b82f
AG
5907 if (r)
5908 goto out;
5909
04442bf7 5910 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5911
5912out:
c9a6b82f 5913 if (!r) {
c1dd4aa6
AG
5914 if (amdgpu_device_cache_pci_state(adev->pdev))
5915 pci_restore_state(adev->pdev);
5916
c9a6b82f
AG
5917 DRM_INFO("PCIe error recovery succeeded\n");
5918 } else {
5919 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5920 amdgpu_device_unset_mp1_state(adev);
5921 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5922 }
5923
5924 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5925}
5926
5927/**
5928 * amdgpu_pci_resume() - resume normal ops after PCI reset
5929 * @pdev: pointer to PCI device
5930 *
5931 * Called when the error recovery driver tells us that its
505199a3 5932 * OK to resume normal operation.
c9a6b82f
AG
5933 */
5934void amdgpu_pci_resume(struct pci_dev *pdev)
5935{
5936 struct drm_device *dev = pci_get_drvdata(pdev);
5937 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5938 int i;
c9a6b82f 5939
c9a6b82f
AG
5940
5941 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5942
e17e27f9
GC
5943 /* Only continue execution for the case of pci_channel_io_frozen */
5944 if (adev->pci_channel_state != pci_channel_io_frozen)
5945 return;
5946
acd89fca
AG
5947 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5948 struct amdgpu_ring *ring = adev->rings[i];
5949
5950 if (!ring || !ring->sched.thread)
5951 continue;
5952
acd89fca
AG
5953 drm_sched_start(&ring->sched, true);
5954 }
5955
e923be99
AG
5956 amdgpu_device_unset_mp1_state(adev);
5957 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5958}
c1dd4aa6
AG
5959
5960bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5961{
5962 struct drm_device *dev = pci_get_drvdata(pdev);
5963 struct amdgpu_device *adev = drm_to_adev(dev);
5964 int r;
5965
5966 r = pci_save_state(pdev);
5967 if (!r) {
5968 kfree(adev->pci_state);
5969
5970 adev->pci_state = pci_store_saved_state(pdev);
5971
5972 if (!adev->pci_state) {
5973 DRM_ERROR("Failed to store PCI saved state");
5974 return false;
5975 }
5976 } else {
5977 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5978 return false;
5979 }
5980
5981 return true;
5982}
5983
5984bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5985{
5986 struct drm_device *dev = pci_get_drvdata(pdev);
5987 struct amdgpu_device *adev = drm_to_adev(dev);
5988 int r;
5989
5990 if (!adev->pci_state)
5991 return false;
5992
5993 r = pci_load_saved_state(pdev, adev->pci_state);
5994
5995 if (!r) {
5996 pci_restore_state(pdev);
5997 } else {
5998 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5999 return false;
6000 }
6001
6002 return true;
6003}
6004
810085dd
EH
6005void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6006 struct amdgpu_ring *ring)
6007{
6008#ifdef CONFIG_X86_64
b818a5d3 6009 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6010 return;
6011#endif
6012 if (adev->gmc.xgmi.connected_to_cpu)
6013 return;
6014
6015 if (ring && ring->funcs->emit_hdp_flush)
6016 amdgpu_ring_emit_hdp_flush(ring);
6017 else
6018 amdgpu_asic_flush_hdp(adev, ring);
6019}
c1dd4aa6 6020
810085dd
EH
6021void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6022 struct amdgpu_ring *ring)
6023{
6024#ifdef CONFIG_X86_64
b818a5d3 6025 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6026 return;
6027#endif
6028 if (adev->gmc.xgmi.connected_to_cpu)
6029 return;
c1dd4aa6 6030
810085dd
EH
6031 amdgpu_asic_invalidate_hdp(adev, ring);
6032}
34f3a4a9 6033
89a7a870
AG
6034int amdgpu_in_reset(struct amdgpu_device *adev)
6035{
6036 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6037}
6038
34f3a4a9
LY
6039/**
6040 * amdgpu_device_halt() - bring hardware to some kind of halt state
6041 *
6042 * @adev: amdgpu_device pointer
6043 *
6044 * Bring hardware to some kind of halt state so that no one can touch it
6045 * any more. It will help to maintain error context when error occurred.
6046 * Compare to a simple hang, the system will keep stable at least for SSH
6047 * access. Then it should be trivial to inspect the hardware state and
6048 * see what's going on. Implemented as following:
6049 *
6050 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6051 * clears all CPU mappings to device, disallows remappings through page faults
6052 * 2. amdgpu_irq_disable_all() disables all interrupts
6053 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6054 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6055 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6056 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6057 * flush any in flight DMA operations
6058 */
6059void amdgpu_device_halt(struct amdgpu_device *adev)
6060{
6061 struct pci_dev *pdev = adev->pdev;
e0f943b4 6062 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
6063
6064 drm_dev_unplug(ddev);
6065
6066 amdgpu_irq_disable_all(adev);
6067
6068 amdgpu_fence_driver_hw_fini(adev);
6069
6070 adev->no_hw_access = true;
6071
6072 amdgpu_device_unmap_mmio(adev);
6073
6074 pci_disable_device(pdev);
6075 pci_wait_for_pending_transaction(pdev);
6076}
86700a40
XD
6077
6078u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6079 u32 reg)
6080{
6081 unsigned long flags, address, data;
6082 u32 r;
6083
6084 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6085 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6086
6087 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6088 WREG32(address, reg * 4);
6089 (void)RREG32(address);
6090 r = RREG32(data);
6091 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6092 return r;
6093}
6094
6095void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6096 u32 reg, u32 v)
6097{
6098 unsigned long flags, address, data;
6099
6100 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6101 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6102
6103 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6104 WREG32(address, reg * 4);
6105 (void)RREG32(address);
6106 WREG32(data, v);
6107 (void)RREG32(data);
6108 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6109}
68ce8b24
CK
6110
6111/**
6112 * amdgpu_device_switch_gang - switch to a new gang
6113 * @adev: amdgpu_device pointer
6114 * @gang: the gang to switch to
6115 *
6116 * Try to switch to a new gang.
6117 * Returns: NULL if we switched to the new gang or a reference to the current
6118 * gang leader.
6119 */
6120struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6121 struct dma_fence *gang)
6122{
6123 struct dma_fence *old = NULL;
6124
6125 do {
6126 dma_fence_put(old);
6127 rcu_read_lock();
6128 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6129 rcu_read_unlock();
6130
6131 if (old == gang)
6132 break;
6133
6134 if (!dma_fence_is_signaled(old))
6135 return old;
6136
6137 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6138 old, gang) != old);
6139
6140 dma_fence_put(old);
6141 return NULL;
6142}
220c8cc8
AD
6143
6144bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6145{
6146 switch (adev->asic_type) {
6147#ifdef CONFIG_DRM_AMDGPU_SI
6148 case CHIP_HAINAN:
6149#endif
6150 case CHIP_TOPAZ:
6151 /* chips with no display hardware */
6152 return false;
6153#ifdef CONFIG_DRM_AMDGPU_SI
6154 case CHIP_TAHITI:
6155 case CHIP_PITCAIRN:
6156 case CHIP_VERDE:
6157 case CHIP_OLAND:
6158#endif
6159#ifdef CONFIG_DRM_AMDGPU_CIK
6160 case CHIP_BONAIRE:
6161 case CHIP_HAWAII:
6162 case CHIP_KAVERI:
6163 case CHIP_KABINI:
6164 case CHIP_MULLINS:
6165#endif
6166 case CHIP_TONGA:
6167 case CHIP_FIJI:
6168 case CHIP_POLARIS10:
6169 case CHIP_POLARIS11:
6170 case CHIP_POLARIS12:
6171 case CHIP_VEGAM:
6172 case CHIP_CARRIZO:
6173 case CHIP_STONEY:
6174 /* chips with display hardware */
6175 return true;
6176 default:
6177 /* IP discovery */
6178 if (!adev->ip_versions[DCE_HWIP][0] ||
6179 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6180 return false;
6181 return true;
6182 }
6183}
81283fee
JZ
6184
6185uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6186 uint32_t inst, uint32_t reg_addr, char reg_name[],
6187 uint32_t expected_value, uint32_t mask)
6188{
6189 uint32_t ret = 0;
6190 uint32_t old_ = 0;
6191 uint32_t tmp_ = RREG32(reg_addr);
6192 uint32_t loop = adev->usec_timeout;
6193
6194 while ((tmp_ & (mask)) != (expected_value)) {
6195 if (old_ != tmp_) {
6196 loop = adev->usec_timeout;
6197 old_ = tmp_;
6198 } else
6199 udelay(1);
6200 tmp_ = RREG32(reg_addr);
6201 loop--;
6202 if (!loop) {
6203 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6204 inst, reg_name, (uint32_t)expected_value,
6205 (uint32_t)(tmp_ & (mask)));
6206 ret = -ETIMEDOUT;
6207 break;
6208 }
6209 }
6210 return ret;
6211}