drm/amdgpu: Increase soft IH ring size
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
162static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166
KR
167/**
168 * DOC: product_name
169 *
170 * The amdgpu driver provides a sysfs API for reporting the product name
171 * for the device
2c496a6c 172 * The file product_name is used for this and returns the product name
bd607166
KR
173 * as returned from the FRU.
174 * NOTE: This is only available for certain server cards
175 */
176
177static ssize_t amdgpu_device_get_product_name(struct device *dev,
178 struct device_attribute *attr, char *buf)
179{
180 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 181 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 182
36000c7a 183 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
184}
185
186static DEVICE_ATTR(product_name, S_IRUGO,
187 amdgpu_device_get_product_name, NULL);
188
189/**
190 * DOC: product_number
191 *
192 * The amdgpu driver provides a sysfs API for reporting the part number
193 * for the device
2c496a6c 194 * The file product_number is used for this and returns the part number
bd607166
KR
195 * as returned from the FRU.
196 * NOTE: This is only available for certain server cards
197 */
198
199static ssize_t amdgpu_device_get_product_number(struct device *dev,
200 struct device_attribute *attr, char *buf)
201{
202 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 203 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 204
36000c7a 205 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
206}
207
208static DEVICE_ATTR(product_number, S_IRUGO,
209 amdgpu_device_get_product_number, NULL);
210
211/**
212 * DOC: serial_number
213 *
214 * The amdgpu driver provides a sysfs API for reporting the serial number
215 * for the device
216 * The file serial_number is used for this and returns the serial number
217 * as returned from the FRU.
218 * NOTE: This is only available for certain server cards
219 */
220
221static ssize_t amdgpu_device_get_serial_number(struct device *dev,
222 struct device_attribute *attr, char *buf)
223{
224 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 225 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 226
36000c7a 227 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
228}
229
230static DEVICE_ATTR(serial_number, S_IRUGO,
231 amdgpu_device_get_serial_number, NULL);
232
fd496ca8 233/**
b98c6299 234 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
235 *
236 * @dev: drm_device pointer
237 *
b98c6299 238 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
239 * otherwise return false.
240 */
b98c6299 241bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
242{
243 struct amdgpu_device *adev = drm_to_adev(dev);
244
b98c6299 245 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
246 return true;
247 return false;
248}
249
e3ecdffa 250/**
0330b848 251 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
252 *
253 * @dev: drm_device pointer
254 *
b98c6299 255 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
256 * otherwise return false.
257 */
31af062a 258bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 259{
1348969a 260 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 261
b98c6299
AD
262 if (adev->has_pr3 ||
263 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
264 return true;
265 return false;
266}
267
a69cba42
AD
268/**
269 * amdgpu_device_supports_baco - Does the device support BACO
270 *
271 * @dev: drm_device pointer
272 *
273 * Returns true if the device supporte BACO,
274 * otherwise return false.
275 */
276bool amdgpu_device_supports_baco(struct drm_device *dev)
277{
1348969a 278 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
279
280 return amdgpu_asic_supports_baco(adev);
281}
282
3fa8f89d
S
283/**
284 * amdgpu_device_supports_smart_shift - Is the device dGPU with
285 * smart shift support
286 *
287 * @dev: drm_device pointer
288 *
289 * Returns true if the device is a dGPU with Smart Shift support,
290 * otherwise returns false.
291 */
292bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
293{
294 return (amdgpu_device_supports_boco(dev) &&
295 amdgpu_acpi_is_power_shift_control_supported());
296}
297
6e3cd2a9
MCC
298/*
299 * VRAM access helper functions
300 */
301
e35e2b11 302/**
048af66b 303 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
304 *
305 * @adev: amdgpu_device pointer
306 * @pos: offset of the buffer in vram
307 * @buf: virtual address of the buffer in system memory
308 * @size: read/write size, sizeof(@buf) must > @size
309 * @write: true - write to vram, otherwise - read from vram
310 */
048af66b
KW
311void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
312 void *buf, size_t size, bool write)
e35e2b11 313{
e35e2b11 314 unsigned long flags;
048af66b
KW
315 uint32_t hi = ~0, tmp = 0;
316 uint32_t *data = buf;
ce05ac56 317 uint64_t last;
f89f8c6b 318 int idx;
ce05ac56 319
c58a863b 320 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 321 return;
9d11eb0d 322
048af66b
KW
323 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
324
325 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
326 for (last = pos + size; pos < last; pos += 4) {
327 tmp = pos >> 31;
328
329 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
330 if (tmp != hi) {
331 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
332 hi = tmp;
333 }
334 if (write)
335 WREG32_NO_KIQ(mmMM_DATA, *data++);
336 else
337 *data++ = RREG32_NO_KIQ(mmMM_DATA);
338 }
339
340 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
341 drm_dev_exit(idx);
342}
343
344/**
bbe04dec 345 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
346 *
347 * @adev: amdgpu_device pointer
348 * @pos: offset of the buffer in vram
349 * @buf: virtual address of the buffer in system memory
350 * @size: read/write size, sizeof(@buf) must > @size
351 * @write: true - write to vram, otherwise - read from vram
352 *
353 * The return value means how many bytes have been transferred.
354 */
355size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
356 void *buf, size_t size, bool write)
357{
9d11eb0d 358#ifdef CONFIG_64BIT
048af66b
KW
359 void __iomem *addr;
360 size_t count = 0;
361 uint64_t last;
362
363 if (!adev->mman.aper_base_kaddr)
364 return 0;
365
9d11eb0d
CK
366 last = min(pos + size, adev->gmc.visible_vram_size);
367 if (last > pos) {
048af66b
KW
368 addr = adev->mman.aper_base_kaddr + pos;
369 count = last - pos;
9d11eb0d
CK
370
371 if (write) {
372 memcpy_toio(addr, buf, count);
373 mb();
810085dd 374 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 375 } else {
810085dd 376 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
377 mb();
378 memcpy_fromio(buf, addr, count);
379 }
380
9d11eb0d 381 }
048af66b
KW
382
383 return count;
384#else
385 return 0;
9d11eb0d 386#endif
048af66b 387}
9d11eb0d 388
048af66b
KW
389/**
390 * amdgpu_device_vram_access - read/write a buffer in vram
391 *
392 * @adev: amdgpu_device pointer
393 * @pos: offset of the buffer in vram
394 * @buf: virtual address of the buffer in system memory
395 * @size: read/write size, sizeof(@buf) must > @size
396 * @write: true - write to vram, otherwise - read from vram
397 */
398void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
399 void *buf, size_t size, bool write)
400{
401 size_t count;
e35e2b11 402
048af66b
KW
403 /* try to using vram apreature to access vram first */
404 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
405 size -= count;
406 if (size) {
407 /* using MM to access rest vram */
408 pos += count;
409 buf += count;
410 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
411 }
412}
413
d38ceaf9 414/*
f7ee1874 415 * register access helper functions.
d38ceaf9 416 */
56b53c0b
DL
417
418/* Check if hw access should be skipped because of hotplug or device error */
419bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
420{
7afefb81 421 if (adev->no_hw_access)
56b53c0b
DL
422 return true;
423
424#ifdef CONFIG_LOCKDEP
425 /*
426 * This is a bit complicated to understand, so worth a comment. What we assert
427 * here is that the GPU reset is not running on another thread in parallel.
428 *
429 * For this we trylock the read side of the reset semaphore, if that succeeds
430 * we know that the reset is not running in paralell.
431 *
432 * If the trylock fails we assert that we are either already holding the read
433 * side of the lock or are the reset thread itself and hold the write side of
434 * the lock.
435 */
436 if (in_task()) {
d0fb18b5
AG
437 if (down_read_trylock(&adev->reset_domain->sem))
438 up_read(&adev->reset_domain->sem);
56b53c0b 439 else
d0fb18b5 440 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
441 }
442#endif
443 return false;
444}
445
e3ecdffa 446/**
f7ee1874 447 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
448 *
449 * @adev: amdgpu_device pointer
450 * @reg: dword aligned register offset
451 * @acc_flags: access flags which require special behavior
452 *
453 * Returns the 32 bit value from the offset specified.
454 */
f7ee1874
HZ
455uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
456 uint32_t reg, uint32_t acc_flags)
d38ceaf9 457{
f4b373f4
TSD
458 uint32_t ret;
459
56b53c0b 460 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
461 return 0;
462
f7ee1874
HZ
463 if ((reg * 4) < adev->rmmio_size) {
464 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
465 amdgpu_sriov_runtime(adev) &&
d0fb18b5 466 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 467 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 468 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
469 } else {
470 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
471 }
472 } else {
473 ret = adev->pcie_rreg(adev, reg * 4);
81202807 474 }
bc992ba5 475
f7ee1874 476 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 477
f4b373f4 478 return ret;
d38ceaf9
AD
479}
480
421a2a30
ML
481/*
482 * MMIO register read with bytes helper functions
483 * @offset:bytes offset from MMIO start
484 *
485*/
486
e3ecdffa
AD
487/**
488 * amdgpu_mm_rreg8 - read a memory mapped IO register
489 *
490 * @adev: amdgpu_device pointer
491 * @offset: byte aligned register offset
492 *
493 * Returns the 8 bit value from the offset specified.
494 */
7cbbc745
AG
495uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
496{
56b53c0b 497 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
498 return 0;
499
421a2a30
ML
500 if (offset < adev->rmmio_size)
501 return (readb(adev->rmmio + offset));
502 BUG();
503}
504
505/*
506 * MMIO register write with bytes helper functions
507 * @offset:bytes offset from MMIO start
508 * @value: the value want to be written to the register
509 *
510*/
e3ecdffa
AD
511/**
512 * amdgpu_mm_wreg8 - read a memory mapped IO register
513 *
514 * @adev: amdgpu_device pointer
515 * @offset: byte aligned register offset
516 * @value: 8 bit value to write
517 *
518 * Writes the value specified to the offset specified.
519 */
7cbbc745
AG
520void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
521{
56b53c0b 522 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
523 return;
524
421a2a30
ML
525 if (offset < adev->rmmio_size)
526 writeb(value, adev->rmmio + offset);
527 else
528 BUG();
529}
530
e3ecdffa 531/**
f7ee1874 532 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
533 *
534 * @adev: amdgpu_device pointer
535 * @reg: dword aligned register offset
536 * @v: 32 bit value to write to the register
537 * @acc_flags: access flags which require special behavior
538 *
539 * Writes the value specified to the offset specified.
540 */
f7ee1874
HZ
541void amdgpu_device_wreg(struct amdgpu_device *adev,
542 uint32_t reg, uint32_t v,
543 uint32_t acc_flags)
d38ceaf9 544{
56b53c0b 545 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
546 return;
547
f7ee1874
HZ
548 if ((reg * 4) < adev->rmmio_size) {
549 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
550 amdgpu_sriov_runtime(adev) &&
d0fb18b5 551 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 552 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 553 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
554 } else {
555 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
556 }
557 } else {
558 adev->pcie_wreg(adev, reg * 4, v);
81202807 559 }
bc992ba5 560
f7ee1874 561 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 562}
d38ceaf9 563
03f2abb0 564/**
4cc9f86f 565 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 566 *
71579346
RB
567 * @adev: amdgpu_device pointer
568 * @reg: mmio/rlc register
569 * @v: value to write
570 *
571 * this function is invoked only for the debugfs register access
03f2abb0 572 */
f7ee1874
HZ
573void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
574 uint32_t reg, uint32_t v)
2e0cc4d4 575{
56b53c0b 576 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
577 return;
578
2e0cc4d4 579 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
580 adev->gfx.rlc.funcs &&
581 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 582 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 583 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
584 } else if ((reg * 4) >= adev->rmmio_size) {
585 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
586 } else {
587 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 588 }
d38ceaf9
AD
589}
590
d38ceaf9
AD
591/**
592 * amdgpu_mm_rdoorbell - read a doorbell dword
593 *
594 * @adev: amdgpu_device pointer
595 * @index: doorbell index
596 *
597 * Returns the value in the doorbell aperture at the
598 * requested doorbell index (CIK).
599 */
600u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
601{
56b53c0b 602 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
603 return 0;
604
0512e9ff 605 if (index < adev->doorbell.num_kernel_doorbells) {
d38ceaf9
AD
606 return readl(adev->doorbell.ptr + index);
607 } else {
608 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
609 return 0;
610 }
611}
612
613/**
614 * amdgpu_mm_wdoorbell - write a doorbell dword
615 *
616 * @adev: amdgpu_device pointer
617 * @index: doorbell index
618 * @v: value to write
619 *
620 * Writes @v to the doorbell aperture at the
621 * requested doorbell index (CIK).
622 */
623void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
624{
56b53c0b 625 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
626 return;
627
0512e9ff 628 if (index < adev->doorbell.num_kernel_doorbells) {
d38ceaf9
AD
629 writel(v, adev->doorbell.ptr + index);
630 } else {
631 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
632 }
633}
634
832be404
KW
635/**
636 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
637 *
638 * @adev: amdgpu_device pointer
639 * @index: doorbell index
640 *
641 * Returns the value in the doorbell aperture at the
642 * requested doorbell index (VEGA10+).
643 */
644u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
645{
56b53c0b 646 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
647 return 0;
648
0512e9ff 649 if (index < adev->doorbell.num_kernel_doorbells) {
832be404
KW
650 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
651 } else {
652 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
653 return 0;
654 }
655}
656
657/**
658 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
659 *
660 * @adev: amdgpu_device pointer
661 * @index: doorbell index
662 * @v: value to write
663 *
664 * Writes @v to the doorbell aperture at the
665 * requested doorbell index (VEGA10+).
666 */
667void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
668{
56b53c0b 669 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
670 return;
671
0512e9ff 672 if (index < adev->doorbell.num_kernel_doorbells) {
832be404
KW
673 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
674 } else {
675 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
676 }
677}
678
1bba3683
HZ
679/**
680 * amdgpu_device_indirect_rreg - read an indirect register
681 *
682 * @adev: amdgpu_device pointer
22f453fb 683 * @reg_addr: indirect register address to read from
1bba3683
HZ
684 *
685 * Returns the value of indirect register @reg_addr
686 */
687u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
688 u32 reg_addr)
689{
65ba96e9 690 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
691 void __iomem *pcie_index_offset;
692 void __iomem *pcie_data_offset;
65ba96e9
HZ
693 u32 r;
694
695 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
696 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
697
698 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
699 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
700 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
701
702 writel(reg_addr, pcie_index_offset);
703 readl(pcie_index_offset);
704 r = readl(pcie_data_offset);
705 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
706
707 return r;
708}
709
0c552ed3
LM
710u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
711 u64 reg_addr)
712{
713 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
714 u32 r;
715 void __iomem *pcie_index_offset;
716 void __iomem *pcie_index_hi_offset;
717 void __iomem *pcie_data_offset;
718
719 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
720 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
721 if (adev->nbio.funcs->get_pcie_index_hi_offset)
722 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
723 else
724 pcie_index_hi = 0;
725
726 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
727 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
728 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
729 if (pcie_index_hi != 0)
730 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
731 pcie_index_hi * 4;
732
733 writel(reg_addr, pcie_index_offset);
734 readl(pcie_index_offset);
735 if (pcie_index_hi != 0) {
736 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
737 readl(pcie_index_hi_offset);
738 }
739 r = readl(pcie_data_offset);
740
741 /* clear the high bits */
742 if (pcie_index_hi != 0) {
743 writel(0, pcie_index_hi_offset);
744 readl(pcie_index_hi_offset);
745 }
746
747 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
748
749 return r;
750}
751
1bba3683
HZ
752/**
753 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
754 *
755 * @adev: amdgpu_device pointer
22f453fb 756 * @reg_addr: indirect register address to read from
1bba3683
HZ
757 *
758 * Returns the value of indirect register @reg_addr
759 */
760u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
761 u32 reg_addr)
762{
65ba96e9 763 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
764 void __iomem *pcie_index_offset;
765 void __iomem *pcie_data_offset;
65ba96e9
HZ
766 u64 r;
767
768 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
769 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
770
771 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
772 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
773 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
774
775 /* read low 32 bits */
776 writel(reg_addr, pcie_index_offset);
777 readl(pcie_index_offset);
778 r = readl(pcie_data_offset);
779 /* read high 32 bits */
780 writel(reg_addr + 4, pcie_index_offset);
781 readl(pcie_index_offset);
782 r |= ((u64)readl(pcie_data_offset) << 32);
783 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
784
785 return r;
786}
787
788/**
789 * amdgpu_device_indirect_wreg - write an indirect register address
790 *
791 * @adev: amdgpu_device pointer
1bba3683
HZ
792 * @reg_addr: indirect register offset
793 * @reg_data: indirect register data
794 *
795 */
796void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
797 u32 reg_addr, u32 reg_data)
798{
65ba96e9 799 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
800 void __iomem *pcie_index_offset;
801 void __iomem *pcie_data_offset;
802
65ba96e9
HZ
803 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
804 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
805
1bba3683
HZ
806 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
807 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
808 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
809
810 writel(reg_addr, pcie_index_offset);
811 readl(pcie_index_offset);
812 writel(reg_data, pcie_data_offset);
813 readl(pcie_data_offset);
814 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
815}
816
0c552ed3
LM
817void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
818 u64 reg_addr, u32 reg_data)
819{
820 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
821 void __iomem *pcie_index_offset;
822 void __iomem *pcie_index_hi_offset;
823 void __iomem *pcie_data_offset;
824
825 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
826 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
827 if (adev->nbio.funcs->get_pcie_index_hi_offset)
828 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
829 else
830 pcie_index_hi = 0;
831
832 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
833 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
834 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
835 if (pcie_index_hi != 0)
836 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
837 pcie_index_hi * 4;
838
839 writel(reg_addr, pcie_index_offset);
840 readl(pcie_index_offset);
841 if (pcie_index_hi != 0) {
842 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
843 readl(pcie_index_hi_offset);
844 }
845 writel(reg_data, pcie_data_offset);
846 readl(pcie_data_offset);
847
848 /* clear the high bits */
849 if (pcie_index_hi != 0) {
850 writel(0, pcie_index_hi_offset);
851 readl(pcie_index_hi_offset);
852 }
853
854 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
855}
856
1bba3683
HZ
857/**
858 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
859 *
860 * @adev: amdgpu_device pointer
1bba3683
HZ
861 * @reg_addr: indirect register offset
862 * @reg_data: indirect register data
863 *
864 */
865void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
866 u32 reg_addr, u64 reg_data)
867{
65ba96e9 868 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
869 void __iomem *pcie_index_offset;
870 void __iomem *pcie_data_offset;
871
65ba96e9
HZ
872 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
873 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
874
1bba3683
HZ
875 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
876 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
877 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
878
879 /* write low 32 bits */
880 writel(reg_addr, pcie_index_offset);
881 readl(pcie_index_offset);
882 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
883 readl(pcie_data_offset);
884 /* write high 32 bits */
885 writel(reg_addr + 4, pcie_index_offset);
886 readl(pcie_index_offset);
887 writel((u32)(reg_data >> 32), pcie_data_offset);
888 readl(pcie_data_offset);
889 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
890}
891
dabc114e
HZ
892/**
893 * amdgpu_device_get_rev_id - query device rev_id
894 *
895 * @adev: amdgpu_device pointer
896 *
897 * Return device rev_id
898 */
899u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
900{
901 return adev->nbio.funcs->get_rev_id(adev);
902}
903
d38ceaf9
AD
904/**
905 * amdgpu_invalid_rreg - dummy reg read function
906 *
982a820b 907 * @adev: amdgpu_device pointer
d38ceaf9
AD
908 * @reg: offset of register
909 *
910 * Dummy register read function. Used for register blocks
911 * that certain asics don't have (all asics).
912 * Returns the value in the register.
913 */
914static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
915{
916 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
917 BUG();
918 return 0;
919}
920
0c552ed3
LM
921static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
922{
923 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
924 BUG();
925 return 0;
926}
927
d38ceaf9
AD
928/**
929 * amdgpu_invalid_wreg - dummy reg write function
930 *
982a820b 931 * @adev: amdgpu_device pointer
d38ceaf9
AD
932 * @reg: offset of register
933 * @v: value to write to the register
934 *
935 * Dummy register read function. Used for register blocks
936 * that certain asics don't have (all asics).
937 */
938static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
939{
940 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
941 reg, v);
942 BUG();
943}
944
0c552ed3
LM
945static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
946{
947 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
948 reg, v);
949 BUG();
950}
951
4fa1c6a6
TZ
952/**
953 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
954 *
982a820b 955 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
956 * @reg: offset of register
957 *
958 * Dummy register read function. Used for register blocks
959 * that certain asics don't have (all asics).
960 * Returns the value in the register.
961 */
962static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
963{
964 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
965 BUG();
966 return 0;
967}
968
969/**
970 * amdgpu_invalid_wreg64 - dummy reg write function
971 *
982a820b 972 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
973 * @reg: offset of register
974 * @v: value to write to the register
975 *
976 * Dummy register read function. Used for register blocks
977 * that certain asics don't have (all asics).
978 */
979static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
980{
981 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
982 reg, v);
983 BUG();
984}
985
d38ceaf9
AD
986/**
987 * amdgpu_block_invalid_rreg - dummy reg read function
988 *
982a820b 989 * @adev: amdgpu_device pointer
d38ceaf9
AD
990 * @block: offset of instance
991 * @reg: offset of register
992 *
993 * Dummy register read function. Used for register blocks
994 * that certain asics don't have (all asics).
995 * Returns the value in the register.
996 */
997static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
998 uint32_t block, uint32_t reg)
999{
1000 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1001 reg, block);
1002 BUG();
1003 return 0;
1004}
1005
1006/**
1007 * amdgpu_block_invalid_wreg - dummy reg write function
1008 *
982a820b 1009 * @adev: amdgpu_device pointer
d38ceaf9
AD
1010 * @block: offset of instance
1011 * @reg: offset of register
1012 * @v: value to write to the register
1013 *
1014 * Dummy register read function. Used for register blocks
1015 * that certain asics don't have (all asics).
1016 */
1017static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1018 uint32_t block,
1019 uint32_t reg, uint32_t v)
1020{
1021 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1022 reg, block, v);
1023 BUG();
1024}
1025
4d2997ab
AD
1026/**
1027 * amdgpu_device_asic_init - Wrapper for atom asic_init
1028 *
982a820b 1029 * @adev: amdgpu_device pointer
4d2997ab
AD
1030 *
1031 * Does any asic specific work and then calls atom asic init.
1032 */
1033static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1034{
1035 amdgpu_asic_pre_asic_init(adev);
1036
5db392a0
LL
1037 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
1038 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
85d1bcc6
HZ
1039 return amdgpu_atomfirmware_asic_init(adev, true);
1040 else
1041 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
1042}
1043
e3ecdffa 1044/**
7ccfd79f 1045 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1046 *
982a820b 1047 * @adev: amdgpu_device pointer
e3ecdffa
AD
1048 *
1049 * Allocates a scratch page of VRAM for use by various things in the
1050 * driver.
1051 */
7ccfd79f 1052static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1053{
7ccfd79f
CK
1054 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1055 AMDGPU_GEM_DOMAIN_VRAM |
1056 AMDGPU_GEM_DOMAIN_GTT,
1057 &adev->mem_scratch.robj,
1058 &adev->mem_scratch.gpu_addr,
1059 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1060}
1061
e3ecdffa 1062/**
7ccfd79f 1063 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1064 *
982a820b 1065 * @adev: amdgpu_device pointer
e3ecdffa
AD
1066 *
1067 * Frees the VRAM scratch page.
1068 */
7ccfd79f 1069static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1070{
7ccfd79f 1071 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1072}
1073
1074/**
9c3f2b54 1075 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1076 *
1077 * @adev: amdgpu_device pointer
1078 * @registers: pointer to the register array
1079 * @array_size: size of the register array
1080 *
1081 * Programs an array or registers with and and or masks.
1082 * This is a helper for setting golden registers.
1083 */
9c3f2b54
AD
1084void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1085 const u32 *registers,
1086 const u32 array_size)
d38ceaf9
AD
1087{
1088 u32 tmp, reg, and_mask, or_mask;
1089 int i;
1090
1091 if (array_size % 3)
1092 return;
1093
47fc644f 1094 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1095 reg = registers[i + 0];
1096 and_mask = registers[i + 1];
1097 or_mask = registers[i + 2];
1098
1099 if (and_mask == 0xffffffff) {
1100 tmp = or_mask;
1101 } else {
1102 tmp = RREG32(reg);
1103 tmp &= ~and_mask;
e0d07657
HZ
1104 if (adev->family >= AMDGPU_FAMILY_AI)
1105 tmp |= (or_mask & and_mask);
1106 else
1107 tmp |= or_mask;
d38ceaf9
AD
1108 }
1109 WREG32(reg, tmp);
1110 }
1111}
1112
e3ecdffa
AD
1113/**
1114 * amdgpu_device_pci_config_reset - reset the GPU
1115 *
1116 * @adev: amdgpu_device pointer
1117 *
1118 * Resets the GPU using the pci config reset sequence.
1119 * Only applicable to asics prior to vega10.
1120 */
8111c387 1121void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1122{
1123 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1124}
1125
af484df8
AD
1126/**
1127 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1128 *
1129 * @adev: amdgpu_device pointer
1130 *
1131 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1132 */
1133int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1134{
1135 return pci_reset_function(adev->pdev);
1136}
1137
d38ceaf9
AD
1138/*
1139 * GPU doorbell aperture helpers function.
1140 */
1141/**
06ec9070 1142 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1143 *
1144 * @adev: amdgpu_device pointer
1145 *
1146 * Init doorbell driver information (CIK)
1147 * Returns 0 on success, error on failure.
1148 */
06ec9070 1149static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1150{
6585661d 1151
705e519e
CK
1152 /* No doorbell on SI hardware generation */
1153 if (adev->asic_type < CHIP_BONAIRE) {
1154 adev->doorbell.base = 0;
1155 adev->doorbell.size = 0;
0512e9ff 1156 adev->doorbell.num_kernel_doorbells = 0;
705e519e
CK
1157 adev->doorbell.ptr = NULL;
1158 return 0;
1159 }
1160
d6895ad3
CK
1161 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1162 return -EINVAL;
1163
22357775
AD
1164 amdgpu_asic_init_doorbell_index(adev);
1165
d38ceaf9
AD
1166 /* doorbell bar mapping */
1167 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1168 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1169
de33a329 1170 if (adev->enable_mes) {
0512e9ff 1171 adev->doorbell.num_kernel_doorbells =
de33a329
JX
1172 adev->doorbell.size / sizeof(u32);
1173 } else {
0512e9ff 1174 adev->doorbell.num_kernel_doorbells =
de33a329
JX
1175 min_t(u32, adev->doorbell.size / sizeof(u32),
1176 adev->doorbell_index.max_assignment+1);
0512e9ff 1177 if (adev->doorbell.num_kernel_doorbells == 0)
de33a329
JX
1178 return -EINVAL;
1179
1180 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1181 * paging queue doorbell use the second page. The
1182 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1183 * doorbells are in the first page. So with paging queue enabled,
0512e9ff 1184 * the max num_kernel_doorbells should + 1 page (0x400 in dword)
de33a329 1185 */
0ee20b86
LM
1186 if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) &&
1187 adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0))
0512e9ff 1188 adev->doorbell.num_kernel_doorbells += 0x400;
de33a329 1189 }
ec3db8a6 1190
8972e5d2 1191 adev->doorbell.ptr = ioremap(adev->doorbell.base,
0512e9ff 1192 adev->doorbell.num_kernel_doorbells *
8972e5d2
CK
1193 sizeof(u32));
1194 if (adev->doorbell.ptr == NULL)
d38ceaf9 1195 return -ENOMEM;
d38ceaf9
AD
1196
1197 return 0;
1198}
1199
1200/**
06ec9070 1201 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1202 *
1203 * @adev: amdgpu_device pointer
1204 *
1205 * Tear down doorbell driver information (CIK)
1206 */
06ec9070 1207static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1208{
1209 iounmap(adev->doorbell.ptr);
1210 adev->doorbell.ptr = NULL;
1211}
1212
22cb0164 1213
d38ceaf9
AD
1214
1215/*
06ec9070 1216 * amdgpu_device_wb_*()
455a7bc2 1217 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1218 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1219 */
1220
1221/**
06ec9070 1222 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1223 *
1224 * @adev: amdgpu_device pointer
1225 *
1226 * Disables Writeback and frees the Writeback memory (all asics).
1227 * Used at driver shutdown.
1228 */
06ec9070 1229static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1230{
1231 if (adev->wb.wb_obj) {
a76ed485
AD
1232 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1233 &adev->wb.gpu_addr,
1234 (void **)&adev->wb.wb);
d38ceaf9
AD
1235 adev->wb.wb_obj = NULL;
1236 }
1237}
1238
1239/**
03f2abb0 1240 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1241 *
1242 * @adev: amdgpu_device pointer
1243 *
455a7bc2 1244 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1245 * Used at driver startup.
1246 * Returns 0 on success or an -error on failure.
1247 */
06ec9070 1248static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1249{
1250 int r;
1251
1252 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1253 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1254 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1255 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1256 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1257 (void **)&adev->wb.wb);
d38ceaf9
AD
1258 if (r) {
1259 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1260 return r;
1261 }
d38ceaf9
AD
1262
1263 adev->wb.num_wb = AMDGPU_MAX_WB;
1264 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1265
1266 /* clear wb memory */
73469585 1267 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1268 }
1269
1270 return 0;
1271}
1272
1273/**
131b4b36 1274 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1275 *
1276 * @adev: amdgpu_device pointer
1277 * @wb: wb index
1278 *
1279 * Allocate a wb slot for use by the driver (all asics).
1280 * Returns 0 on success or -EINVAL on failure.
1281 */
131b4b36 1282int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1283{
1284 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1285
97407b63 1286 if (offset < adev->wb.num_wb) {
7014285a 1287 __set_bit(offset, adev->wb.used);
63ae07ca 1288 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1289 return 0;
1290 } else {
1291 return -EINVAL;
1292 }
1293}
1294
d38ceaf9 1295/**
131b4b36 1296 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1297 *
1298 * @adev: amdgpu_device pointer
1299 * @wb: wb index
1300 *
1301 * Free a wb slot allocated for use by the driver (all asics)
1302 */
131b4b36 1303void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1304{
73469585 1305 wb >>= 3;
d38ceaf9 1306 if (wb < adev->wb.num_wb)
73469585 1307 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1308}
1309
d6895ad3
CK
1310/**
1311 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1312 *
1313 * @adev: amdgpu_device pointer
1314 *
1315 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1316 * to fail, but if any of the BARs is not accessible after the size we abort
1317 * driver loading by returning -ENODEV.
1318 */
1319int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1320{
453f617a 1321 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1322 struct pci_bus *root;
1323 struct resource *res;
1324 unsigned i;
d6895ad3
CK
1325 u16 cmd;
1326 int r;
1327
0c03b912 1328 /* Bypass for VF */
1329 if (amdgpu_sriov_vf(adev))
1330 return 0;
1331
b7221f2b
AD
1332 /* skip if the bios has already enabled large BAR */
1333 if (adev->gmc.real_vram_size &&
1334 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1335 return 0;
1336
31b8adab
CK
1337 /* Check if the root BUS has 64bit memory resources */
1338 root = adev->pdev->bus;
1339 while (root->parent)
1340 root = root->parent;
1341
1342 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1343 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1344 res->start > 0x100000000ull)
1345 break;
1346 }
1347
1348 /* Trying to resize is pointless without a root hub window above 4GB */
1349 if (!res)
1350 return 0;
1351
453f617a
ND
1352 /* Limit the BAR size to what is available */
1353 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1354 rbar_size);
1355
d6895ad3
CK
1356 /* Disable memory decoding while we change the BAR addresses and size */
1357 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1358 pci_write_config_word(adev->pdev, PCI_COMMAND,
1359 cmd & ~PCI_COMMAND_MEMORY);
1360
1361 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1362 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1363 if (adev->asic_type >= CHIP_BONAIRE)
1364 pci_release_resource(adev->pdev, 2);
1365
1366 pci_release_resource(adev->pdev, 0);
1367
1368 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1369 if (r == -ENOSPC)
1370 DRM_INFO("Not enough PCI address space for a large BAR.");
1371 else if (r && r != -ENOTSUPP)
1372 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1373
1374 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1375
1376 /* When the doorbell or fb BAR isn't available we have no chance of
1377 * using the device.
1378 */
06ec9070 1379 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1380 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1381 return -ENODEV;
1382
1383 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1384
1385 return 0;
1386}
a05502e5 1387
9535a86a
SZ
1388static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1389{
1390 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) {
1391 return false;
1392 }
1393
1394 return true;
1395}
1396
d38ceaf9
AD
1397/*
1398 * GPU helpers function.
1399 */
1400/**
39c640c0 1401 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1402 *
1403 * @adev: amdgpu_device pointer
1404 *
c836fec5
JQ
1405 * Check if the asic has been initialized (all asics) at driver startup
1406 * or post is needed if hw reset is performed.
1407 * Returns true if need or false if not.
d38ceaf9 1408 */
39c640c0 1409bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1410{
1411 uint32_t reg;
1412
bec86378
ML
1413 if (amdgpu_sriov_vf(adev))
1414 return false;
1415
9535a86a
SZ
1416 if (!amdgpu_device_read_bios(adev))
1417 return false;
1418
bec86378 1419 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1420 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1421 * some old smc fw still need driver do vPost otherwise gpu hang, while
1422 * those smc fw version above 22.15 doesn't have this flaw, so we force
1423 * vpost executed for smc version below 22.15
bec86378
ML
1424 */
1425 if (adev->asic_type == CHIP_FIJI) {
1426 int err;
1427 uint32_t fw_ver;
1428 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1429 /* force vPost if error occured */
1430 if (err)
1431 return true;
1432
1433 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1434 if (fw_ver < 0x00160e00)
1435 return true;
bec86378 1436 }
bec86378 1437 }
91fe77eb 1438
e3c1b071 1439 /* Don't post if we need to reset whole hive on init */
1440 if (adev->gmc.xgmi.pending_reset)
1441 return false;
1442
91fe77eb 1443 if (adev->has_hw_reset) {
1444 adev->has_hw_reset = false;
1445 return true;
1446 }
1447
1448 /* bios scratch used on CIK+ */
1449 if (adev->asic_type >= CHIP_BONAIRE)
1450 return amdgpu_atombios_scratch_need_asic_init(adev);
1451
1452 /* check MEM_SIZE for older asics */
1453 reg = amdgpu_asic_get_config_memsize(adev);
1454
1455 if ((reg != 0) && (reg != 0xffffffff))
1456 return false;
1457
1458 return true;
bec86378
ML
1459}
1460
0ab5d711
ML
1461/**
1462 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1463 *
1464 * @adev: amdgpu_device pointer
1465 *
1466 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1467 * be set for this device.
1468 *
1469 * Returns true if it should be used or false if not.
1470 */
1471bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1472{
1473 switch (amdgpu_aspm) {
1474 case -1:
1475 break;
1476 case 0:
1477 return false;
1478 case 1:
1479 return true;
1480 default:
1481 return false;
1482 }
1483 return pcie_aspm_enabled(adev->pdev);
1484}
1485
3ad5dcfe
KHF
1486bool amdgpu_device_aspm_support_quirk(void)
1487{
1488#if IS_ENABLED(CONFIG_X86)
1489 struct cpuinfo_x86 *c = &cpu_data(0);
1490
1491 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1492#else
1493 return true;
1494#endif
1495}
1496
d38ceaf9
AD
1497/* if we get transitioned to only one device, take VGA back */
1498/**
06ec9070 1499 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1500 *
bf44e8ce 1501 * @pdev: PCI device pointer
d38ceaf9
AD
1502 * @state: enable/disable vga decode
1503 *
1504 * Enable/disable vga decode (all asics).
1505 * Returns VGA resource flags.
1506 */
bf44e8ce
CH
1507static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1508 bool state)
d38ceaf9 1509{
bf44e8ce 1510 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1511 amdgpu_asic_set_vga_state(adev, state);
1512 if (state)
1513 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1514 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1515 else
1516 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1517}
1518
e3ecdffa
AD
1519/**
1520 * amdgpu_device_check_block_size - validate the vm block size
1521 *
1522 * @adev: amdgpu_device pointer
1523 *
1524 * Validates the vm block size specified via module parameter.
1525 * The vm block size defines number of bits in page table versus page directory,
1526 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1527 * page table and the remaining bits are in the page directory.
1528 */
06ec9070 1529static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1530{
1531 /* defines number of bits in page table versus page directory,
1532 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1533 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1534 if (amdgpu_vm_block_size == -1)
1535 return;
a1adf8be 1536
bab4fee7 1537 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1538 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1539 amdgpu_vm_block_size);
97489129 1540 amdgpu_vm_block_size = -1;
a1adf8be 1541 }
a1adf8be
CZ
1542}
1543
e3ecdffa
AD
1544/**
1545 * amdgpu_device_check_vm_size - validate the vm size
1546 *
1547 * @adev: amdgpu_device pointer
1548 *
1549 * Validates the vm size in GB specified via module parameter.
1550 * The VM size is the size of the GPU virtual memory space in GB.
1551 */
06ec9070 1552static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1553{
64dab074
AD
1554 /* no need to check the default value */
1555 if (amdgpu_vm_size == -1)
1556 return;
1557
83ca145d
ZJ
1558 if (amdgpu_vm_size < 1) {
1559 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1560 amdgpu_vm_size);
f3368128 1561 amdgpu_vm_size = -1;
83ca145d 1562 }
83ca145d
ZJ
1563}
1564
7951e376
RZ
1565static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1566{
1567 struct sysinfo si;
a9d4fe2f 1568 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1569 uint64_t total_memory;
1570 uint64_t dram_size_seven_GB = 0x1B8000000;
1571 uint64_t dram_size_three_GB = 0xB8000000;
1572
1573 if (amdgpu_smu_memory_pool_size == 0)
1574 return;
1575
1576 if (!is_os_64) {
1577 DRM_WARN("Not 64-bit OS, feature not supported\n");
1578 goto def_value;
1579 }
1580 si_meminfo(&si);
1581 total_memory = (uint64_t)si.totalram * si.mem_unit;
1582
1583 if ((amdgpu_smu_memory_pool_size == 1) ||
1584 (amdgpu_smu_memory_pool_size == 2)) {
1585 if (total_memory < dram_size_three_GB)
1586 goto def_value1;
1587 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1588 (amdgpu_smu_memory_pool_size == 8)) {
1589 if (total_memory < dram_size_seven_GB)
1590 goto def_value1;
1591 } else {
1592 DRM_WARN("Smu memory pool size not supported\n");
1593 goto def_value;
1594 }
1595 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1596
1597 return;
1598
1599def_value1:
1600 DRM_WARN("No enough system memory\n");
1601def_value:
1602 adev->pm.smu_prv_buffer_size = 0;
1603}
1604
9f6a7857
HR
1605static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1606{
1607 if (!(adev->flags & AMD_IS_APU) ||
1608 adev->asic_type < CHIP_RAVEN)
1609 return 0;
1610
1611 switch (adev->asic_type) {
1612 case CHIP_RAVEN:
1613 if (adev->pdev->device == 0x15dd)
1614 adev->apu_flags |= AMD_APU_IS_RAVEN;
1615 if (adev->pdev->device == 0x15d8)
1616 adev->apu_flags |= AMD_APU_IS_PICASSO;
1617 break;
1618 case CHIP_RENOIR:
1619 if ((adev->pdev->device == 0x1636) ||
1620 (adev->pdev->device == 0x164c))
1621 adev->apu_flags |= AMD_APU_IS_RENOIR;
1622 else
1623 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1624 break;
1625 case CHIP_VANGOGH:
1626 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1627 break;
1628 case CHIP_YELLOW_CARP:
1629 break;
d0f56dc2 1630 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1631 if ((adev->pdev->device == 0x13FE) ||
1632 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1633 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1634 break;
9f6a7857 1635 default:
4eaf21b7 1636 break;
9f6a7857
HR
1637 }
1638
1639 return 0;
1640}
1641
d38ceaf9 1642/**
06ec9070 1643 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1644 *
1645 * @adev: amdgpu_device pointer
1646 *
1647 * Validates certain module parameters and updates
1648 * the associated values used by the driver (all asics).
1649 */
912dfc84 1650static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1651{
5b011235
CZ
1652 if (amdgpu_sched_jobs < 4) {
1653 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1654 amdgpu_sched_jobs);
1655 amdgpu_sched_jobs = 4;
47fc644f 1656 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1657 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1658 amdgpu_sched_jobs);
1659 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1660 }
d38ceaf9 1661
83e74db6 1662 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1663 /* gart size must be greater or equal to 32M */
1664 dev_warn(adev->dev, "gart size (%d) too small\n",
1665 amdgpu_gart_size);
83e74db6 1666 amdgpu_gart_size = -1;
d38ceaf9
AD
1667 }
1668
36d38372 1669 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1670 /* gtt size must be greater or equal to 32M */
36d38372
CK
1671 dev_warn(adev->dev, "gtt size (%d) too small\n",
1672 amdgpu_gtt_size);
1673 amdgpu_gtt_size = -1;
d38ceaf9
AD
1674 }
1675
d07f14be
RH
1676 /* valid range is between 4 and 9 inclusive */
1677 if (amdgpu_vm_fragment_size != -1 &&
1678 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1679 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1680 amdgpu_vm_fragment_size = -1;
1681 }
1682
5d5bd5e3
KW
1683 if (amdgpu_sched_hw_submission < 2) {
1684 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1685 amdgpu_sched_hw_submission);
1686 amdgpu_sched_hw_submission = 2;
1687 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1688 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1689 amdgpu_sched_hw_submission);
1690 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1691 }
1692
2656fd23
AG
1693 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1694 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1695 amdgpu_reset_method = -1;
1696 }
1697
7951e376
RZ
1698 amdgpu_device_check_smu_prv_buffer_size(adev);
1699
06ec9070 1700 amdgpu_device_check_vm_size(adev);
d38ceaf9 1701
06ec9070 1702 amdgpu_device_check_block_size(adev);
6a7f76e7 1703
19aede77 1704 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1705
e3c00faa 1706 return 0;
d38ceaf9
AD
1707}
1708
1709/**
1710 * amdgpu_switcheroo_set_state - set switcheroo state
1711 *
1712 * @pdev: pci dev pointer
1694467b 1713 * @state: vga_switcheroo state
d38ceaf9 1714 *
12024b17 1715 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1716 * the asics before or after it is powered up using ACPI methods.
1717 */
8aba21b7
LT
1718static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1719 enum vga_switcheroo_state state)
d38ceaf9
AD
1720{
1721 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1722 int r;
d38ceaf9 1723
b98c6299 1724 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1725 return;
1726
1727 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1728 pr_info("switched on\n");
d38ceaf9
AD
1729 /* don't suspend or resume card normally */
1730 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1731
8f66090b
TZ
1732 pci_set_power_state(pdev, PCI_D0);
1733 amdgpu_device_load_pci_state(pdev);
1734 r = pci_enable_device(pdev);
de185019
AD
1735 if (r)
1736 DRM_WARN("pci_enable_device failed (%d)\n", r);
1737 amdgpu_device_resume(dev, true);
d38ceaf9 1738
d38ceaf9 1739 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1740 } else {
dd4fa6c1 1741 pr_info("switched off\n");
d38ceaf9 1742 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1743 amdgpu_device_suspend(dev, true);
8f66090b 1744 amdgpu_device_cache_pci_state(pdev);
de185019 1745 /* Shut down the device */
8f66090b
TZ
1746 pci_disable_device(pdev);
1747 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1748 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1749 }
1750}
1751
1752/**
1753 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1754 *
1755 * @pdev: pci dev pointer
1756 *
1757 * Callback for the switcheroo driver. Check of the switcheroo
1758 * state can be changed.
1759 * Returns true if the state can be changed, false if not.
1760 */
1761static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1762{
1763 struct drm_device *dev = pci_get_drvdata(pdev);
1764
1765 /*
1766 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1767 * locking inversion with the driver load path. And the access here is
1768 * completely racy anyway. So don't bother with locking for now.
1769 */
7e13ad89 1770 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1771}
1772
1773static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1774 .set_gpu_state = amdgpu_switcheroo_set_state,
1775 .reprobe = NULL,
1776 .can_switch = amdgpu_switcheroo_can_switch,
1777};
1778
e3ecdffa
AD
1779/**
1780 * amdgpu_device_ip_set_clockgating_state - set the CG state
1781 *
87e3f136 1782 * @dev: amdgpu_device pointer
e3ecdffa
AD
1783 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1784 * @state: clockgating state (gate or ungate)
1785 *
1786 * Sets the requested clockgating state for all instances of
1787 * the hardware IP specified.
1788 * Returns the error code from the last instance.
1789 */
43fa561f 1790int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1791 enum amd_ip_block_type block_type,
1792 enum amd_clockgating_state state)
d38ceaf9 1793{
43fa561f 1794 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1795 int i, r = 0;
1796
1797 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1798 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1799 continue;
c722865a
RZ
1800 if (adev->ip_blocks[i].version->type != block_type)
1801 continue;
1802 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1803 continue;
1804 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1805 (void *)adev, state);
1806 if (r)
1807 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1808 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1809 }
1810 return r;
1811}
1812
e3ecdffa
AD
1813/**
1814 * amdgpu_device_ip_set_powergating_state - set the PG state
1815 *
87e3f136 1816 * @dev: amdgpu_device pointer
e3ecdffa
AD
1817 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1818 * @state: powergating state (gate or ungate)
1819 *
1820 * Sets the requested powergating state for all instances of
1821 * the hardware IP specified.
1822 * Returns the error code from the last instance.
1823 */
43fa561f 1824int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1825 enum amd_ip_block_type block_type,
1826 enum amd_powergating_state state)
d38ceaf9 1827{
43fa561f 1828 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1829 int i, r = 0;
1830
1831 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1832 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1833 continue;
c722865a
RZ
1834 if (adev->ip_blocks[i].version->type != block_type)
1835 continue;
1836 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1837 continue;
1838 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1839 (void *)adev, state);
1840 if (r)
1841 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1842 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1843 }
1844 return r;
1845}
1846
e3ecdffa
AD
1847/**
1848 * amdgpu_device_ip_get_clockgating_state - get the CG state
1849 *
1850 * @adev: amdgpu_device pointer
1851 * @flags: clockgating feature flags
1852 *
1853 * Walks the list of IPs on the device and updates the clockgating
1854 * flags for each IP.
1855 * Updates @flags with the feature flags for each hardware IP where
1856 * clockgating is enabled.
1857 */
2990a1fc 1858void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1859 u64 *flags)
6cb2d4e4
HR
1860{
1861 int i;
1862
1863 for (i = 0; i < adev->num_ip_blocks; i++) {
1864 if (!adev->ip_blocks[i].status.valid)
1865 continue;
1866 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1867 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1868 }
1869}
1870
e3ecdffa
AD
1871/**
1872 * amdgpu_device_ip_wait_for_idle - wait for idle
1873 *
1874 * @adev: amdgpu_device pointer
1875 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1876 *
1877 * Waits for the request hardware IP to be idle.
1878 * Returns 0 for success or a negative error code on failure.
1879 */
2990a1fc
AD
1880int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1881 enum amd_ip_block_type block_type)
5dbbb60b
AD
1882{
1883 int i, r;
1884
1885 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1886 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1887 continue;
a1255107
AD
1888 if (adev->ip_blocks[i].version->type == block_type) {
1889 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1890 if (r)
1891 return r;
1892 break;
1893 }
1894 }
1895 return 0;
1896
1897}
1898
e3ecdffa
AD
1899/**
1900 * amdgpu_device_ip_is_idle - is the hardware IP idle
1901 *
1902 * @adev: amdgpu_device pointer
1903 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1904 *
1905 * Check if the hardware IP is idle or not.
1906 * Returns true if it the IP is idle, false if not.
1907 */
2990a1fc
AD
1908bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1909 enum amd_ip_block_type block_type)
5dbbb60b
AD
1910{
1911 int i;
1912
1913 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1914 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1915 continue;
a1255107
AD
1916 if (adev->ip_blocks[i].version->type == block_type)
1917 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1918 }
1919 return true;
1920
1921}
1922
e3ecdffa
AD
1923/**
1924 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1925 *
1926 * @adev: amdgpu_device pointer
87e3f136 1927 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1928 *
1929 * Returns a pointer to the hardware IP block structure
1930 * if it exists for the asic, otherwise NULL.
1931 */
2990a1fc
AD
1932struct amdgpu_ip_block *
1933amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1934 enum amd_ip_block_type type)
d38ceaf9
AD
1935{
1936 int i;
1937
1938 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1939 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1940 return &adev->ip_blocks[i];
1941
1942 return NULL;
1943}
1944
1945/**
2990a1fc 1946 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1947 *
1948 * @adev: amdgpu_device pointer
5fc3aeeb 1949 * @type: enum amd_ip_block_type
d38ceaf9
AD
1950 * @major: major version
1951 * @minor: minor version
1952 *
1953 * return 0 if equal or greater
1954 * return 1 if smaller or the ip_block doesn't exist
1955 */
2990a1fc
AD
1956int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1957 enum amd_ip_block_type type,
1958 u32 major, u32 minor)
d38ceaf9 1959{
2990a1fc 1960 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1961
a1255107
AD
1962 if (ip_block && ((ip_block->version->major > major) ||
1963 ((ip_block->version->major == major) &&
1964 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1965 return 0;
1966
1967 return 1;
1968}
1969
a1255107 1970/**
2990a1fc 1971 * amdgpu_device_ip_block_add
a1255107
AD
1972 *
1973 * @adev: amdgpu_device pointer
1974 * @ip_block_version: pointer to the IP to add
1975 *
1976 * Adds the IP block driver information to the collection of IPs
1977 * on the asic.
1978 */
2990a1fc
AD
1979int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1980 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1981{
1982 if (!ip_block_version)
1983 return -EINVAL;
1984
7bd939d0
LG
1985 switch (ip_block_version->type) {
1986 case AMD_IP_BLOCK_TYPE_VCN:
1987 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1988 return 0;
1989 break;
1990 case AMD_IP_BLOCK_TYPE_JPEG:
1991 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1992 return 0;
1993 break;
1994 default:
1995 break;
1996 }
1997
e966a725 1998 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1999 ip_block_version->funcs->name);
2000
a1255107
AD
2001 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2002
2003 return 0;
2004}
2005
e3ecdffa
AD
2006/**
2007 * amdgpu_device_enable_virtual_display - enable virtual display feature
2008 *
2009 * @adev: amdgpu_device pointer
2010 *
2011 * Enabled the virtual display feature if the user has enabled it via
2012 * the module parameter virtual_display. This feature provides a virtual
2013 * display hardware on headless boards or in virtualized environments.
2014 * This function parses and validates the configuration string specified by
2015 * the user and configues the virtual display configuration (number of
2016 * virtual connectors, crtcs, etc.) specified.
2017 */
483ef985 2018static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2019{
2020 adev->enable_virtual_display = false;
2021
2022 if (amdgpu_virtual_display) {
8f66090b 2023 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2024 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2025
2026 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2027 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2028 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2029 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2030 if (!strcmp("all", pciaddname)
2031 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2032 long num_crtc;
2033 int res = -1;
2034
9accf2fd 2035 adev->enable_virtual_display = true;
0f66356d
ED
2036
2037 if (pciaddname_tmp)
2038 res = kstrtol(pciaddname_tmp, 10,
2039 &num_crtc);
2040
2041 if (!res) {
2042 if (num_crtc < 1)
2043 num_crtc = 1;
2044 if (num_crtc > 6)
2045 num_crtc = 6;
2046 adev->mode_info.num_crtc = num_crtc;
2047 } else {
2048 adev->mode_info.num_crtc = 1;
2049 }
9accf2fd
ED
2050 break;
2051 }
2052 }
2053
0f66356d
ED
2054 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2055 amdgpu_virtual_display, pci_address_name,
2056 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2057
2058 kfree(pciaddstr);
2059 }
2060}
2061
25263da3
AD
2062void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2063{
2064 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2065 adev->mode_info.num_crtc = 1;
2066 adev->enable_virtual_display = true;
2067 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2068 adev->enable_virtual_display, adev->mode_info.num_crtc);
2069 }
2070}
2071
e3ecdffa
AD
2072/**
2073 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2074 *
2075 * @adev: amdgpu_device pointer
2076 *
2077 * Parses the asic configuration parameters specified in the gpu info
2078 * firmware and makes them availale to the driver for use in configuring
2079 * the asic.
2080 * Returns 0 on success, -EINVAL on failure.
2081 */
e2a75f88
AD
2082static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2083{
e2a75f88 2084 const char *chip_name;
c0a43457 2085 char fw_name[40];
e2a75f88
AD
2086 int err;
2087 const struct gpu_info_firmware_header_v1_0 *hdr;
2088
ab4fe3e1
HR
2089 adev->firmware.gpu_info_fw = NULL;
2090
72de33f8 2091 if (adev->mman.discovery_bin) {
cc375d8c
TY
2092 /*
2093 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2094 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2095 * when DAL no longer needs it.
2096 */
2097 if (adev->asic_type != CHIP_NAVI12)
2098 return 0;
258620d0
AD
2099 }
2100
e2a75f88 2101 switch (adev->asic_type) {
e2a75f88
AD
2102 default:
2103 return 0;
2104 case CHIP_VEGA10:
2105 chip_name = "vega10";
2106 break;
3f76dced
AD
2107 case CHIP_VEGA12:
2108 chip_name = "vega12";
2109 break;
2d2e5e7e 2110 case CHIP_RAVEN:
54f78a76 2111 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2112 chip_name = "raven2";
54f78a76 2113 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2114 chip_name = "picasso";
54c4d17e
FX
2115 else
2116 chip_name = "raven";
2d2e5e7e 2117 break;
65e60f6e
LM
2118 case CHIP_ARCTURUS:
2119 chip_name = "arcturus";
2120 break;
42b325e5
XY
2121 case CHIP_NAVI12:
2122 chip_name = "navi12";
2123 break;
e2a75f88
AD
2124 }
2125
2126 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2127 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2128 if (err) {
2129 dev_err(adev->dev,
b31d3063 2130 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2131 fw_name);
2132 goto out;
2133 }
2134
ab4fe3e1 2135 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2136 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2137
2138 switch (hdr->version_major) {
2139 case 1:
2140 {
2141 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2142 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2143 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2144
cc375d8c
TY
2145 /*
2146 * Should be droped when DAL no longer needs it.
2147 */
2148 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2149 goto parse_soc_bounding_box;
2150
b5ab16bf
AD
2151 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2152 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2153 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2154 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2155 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2156 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2157 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2158 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2159 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2160 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2161 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2162 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2163 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2164 adev->gfx.cu_info.max_waves_per_simd =
2165 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2166 adev->gfx.cu_info.max_scratch_slots_per_cu =
2167 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2168 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2169 if (hdr->version_minor >= 1) {
35c2e910
HZ
2170 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2171 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2172 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2173 adev->gfx.config.num_sc_per_sh =
2174 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2175 adev->gfx.config.num_packer_per_sc =
2176 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2177 }
ec51d3fa
XY
2178
2179parse_soc_bounding_box:
ec51d3fa
XY
2180 /*
2181 * soc bounding box info is not integrated in disocovery table,
258620d0 2182 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2183 */
48321c3d
HW
2184 if (hdr->version_minor == 2) {
2185 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2186 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2187 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2188 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2189 }
e2a75f88
AD
2190 break;
2191 }
2192 default:
2193 dev_err(adev->dev,
2194 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2195 err = -EINVAL;
2196 goto out;
2197 }
2198out:
e2a75f88
AD
2199 return err;
2200}
2201
e3ecdffa
AD
2202/**
2203 * amdgpu_device_ip_early_init - run early init for hardware IPs
2204 *
2205 * @adev: amdgpu_device pointer
2206 *
2207 * Early initialization pass for hardware IPs. The hardware IPs that make
2208 * up each asic are discovered each IP's early_init callback is run. This
2209 * is the first stage in initializing the asic.
2210 * Returns 0 on success, negative error code on failure.
2211 */
06ec9070 2212static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2213{
901e2be2
AD
2214 struct drm_device *dev = adev_to_drm(adev);
2215 struct pci_dev *parent;
aaa36a97 2216 int i, r;
ced69502 2217 bool total;
d38ceaf9 2218
483ef985 2219 amdgpu_device_enable_virtual_display(adev);
a6be7570 2220
00a979f3 2221 if (amdgpu_sriov_vf(adev)) {
00a979f3 2222 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2223 if (r)
2224 return r;
00a979f3
WS
2225 }
2226
d38ceaf9 2227 switch (adev->asic_type) {
33f34802
KW
2228#ifdef CONFIG_DRM_AMDGPU_SI
2229 case CHIP_VERDE:
2230 case CHIP_TAHITI:
2231 case CHIP_PITCAIRN:
2232 case CHIP_OLAND:
2233 case CHIP_HAINAN:
295d0daf 2234 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2235 r = si_set_ip_blocks(adev);
2236 if (r)
2237 return r;
2238 break;
2239#endif
a2e73f56
AD
2240#ifdef CONFIG_DRM_AMDGPU_CIK
2241 case CHIP_BONAIRE:
2242 case CHIP_HAWAII:
2243 case CHIP_KAVERI:
2244 case CHIP_KABINI:
2245 case CHIP_MULLINS:
e1ad2d53 2246 if (adev->flags & AMD_IS_APU)
a2e73f56 2247 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2248 else
2249 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2250
2251 r = cik_set_ip_blocks(adev);
2252 if (r)
2253 return r;
2254 break;
2255#endif
da87c30b
AD
2256 case CHIP_TOPAZ:
2257 case CHIP_TONGA:
2258 case CHIP_FIJI:
2259 case CHIP_POLARIS10:
2260 case CHIP_POLARIS11:
2261 case CHIP_POLARIS12:
2262 case CHIP_VEGAM:
2263 case CHIP_CARRIZO:
2264 case CHIP_STONEY:
2265 if (adev->flags & AMD_IS_APU)
2266 adev->family = AMDGPU_FAMILY_CZ;
2267 else
2268 adev->family = AMDGPU_FAMILY_VI;
2269
2270 r = vi_set_ip_blocks(adev);
2271 if (r)
2272 return r;
2273 break;
d38ceaf9 2274 default:
63352b7f
AD
2275 r = amdgpu_discovery_set_ip_blocks(adev);
2276 if (r)
2277 return r;
2278 break;
d38ceaf9
AD
2279 }
2280
901e2be2
AD
2281 if (amdgpu_has_atpx() &&
2282 (amdgpu_is_atpx_hybrid() ||
2283 amdgpu_has_atpx_dgpu_power_cntl()) &&
2284 ((adev->flags & AMD_IS_APU) == 0) &&
2285 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2286 adev->flags |= AMD_IS_PX;
2287
85ac2021
AD
2288 if (!(adev->flags & AMD_IS_APU)) {
2289 parent = pci_upstream_bridge(adev->pdev);
2290 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2291 }
901e2be2 2292
1884734a 2293
3b94fb10 2294 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2295 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2296 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2297 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2298 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2299
ced69502 2300 total = true;
d38ceaf9
AD
2301 for (i = 0; i < adev->num_ip_blocks; i++) {
2302 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2303 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2304 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2305 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2306 } else {
a1255107
AD
2307 if (adev->ip_blocks[i].version->funcs->early_init) {
2308 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2309 if (r == -ENOENT) {
a1255107 2310 adev->ip_blocks[i].status.valid = false;
2c1a2784 2311 } else if (r) {
a1255107
AD
2312 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2313 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2314 total = false;
2c1a2784 2315 } else {
a1255107 2316 adev->ip_blocks[i].status.valid = true;
2c1a2784 2317 }
974e6b64 2318 } else {
a1255107 2319 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2320 }
d38ceaf9 2321 }
21a249ca
AD
2322 /* get the vbios after the asic_funcs are set up */
2323 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2324 r = amdgpu_device_parse_gpu_info_fw(adev);
2325 if (r)
2326 return r;
2327
21a249ca 2328 /* Read BIOS */
9535a86a
SZ
2329 if (amdgpu_device_read_bios(adev)) {
2330 if (!amdgpu_get_bios(adev))
2331 return -EINVAL;
21a249ca 2332
9535a86a
SZ
2333 r = amdgpu_atombios_init(adev);
2334 if (r) {
2335 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2336 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2337 return r;
2338 }
21a249ca 2339 }
77eabc6f
PJZ
2340
2341 /*get pf2vf msg info at it's earliest time*/
2342 if (amdgpu_sriov_vf(adev))
2343 amdgpu_virt_init_data_exchange(adev);
2344
21a249ca 2345 }
d38ceaf9 2346 }
ced69502
ML
2347 if (!total)
2348 return -ENODEV;
d38ceaf9 2349
00fa4035 2350 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2351 adev->cg_flags &= amdgpu_cg_mask;
2352 adev->pg_flags &= amdgpu_pg_mask;
2353
d38ceaf9
AD
2354 return 0;
2355}
2356
0a4f2520
RZ
2357static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2358{
2359 int i, r;
2360
2361 for (i = 0; i < adev->num_ip_blocks; i++) {
2362 if (!adev->ip_blocks[i].status.sw)
2363 continue;
2364 if (adev->ip_blocks[i].status.hw)
2365 continue;
2366 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2367 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2368 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2369 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2370 if (r) {
2371 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2372 adev->ip_blocks[i].version->funcs->name, r);
2373 return r;
2374 }
2375 adev->ip_blocks[i].status.hw = true;
2376 }
2377 }
2378
2379 return 0;
2380}
2381
2382static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2383{
2384 int i, r;
2385
2386 for (i = 0; i < adev->num_ip_blocks; i++) {
2387 if (!adev->ip_blocks[i].status.sw)
2388 continue;
2389 if (adev->ip_blocks[i].status.hw)
2390 continue;
2391 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2392 if (r) {
2393 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2394 adev->ip_blocks[i].version->funcs->name, r);
2395 return r;
2396 }
2397 adev->ip_blocks[i].status.hw = true;
2398 }
2399
2400 return 0;
2401}
2402
7a3e0bb2
RZ
2403static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2404{
2405 int r = 0;
2406 int i;
80f41f84 2407 uint32_t smu_version;
7a3e0bb2
RZ
2408
2409 if (adev->asic_type >= CHIP_VEGA10) {
2410 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2411 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2412 continue;
2413
e3c1b071 2414 if (!adev->ip_blocks[i].status.sw)
2415 continue;
2416
482f0e53
ML
2417 /* no need to do the fw loading again if already done*/
2418 if (adev->ip_blocks[i].status.hw == true)
2419 break;
2420
53b3f8f4 2421 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2422 r = adev->ip_blocks[i].version->funcs->resume(adev);
2423 if (r) {
2424 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2425 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2426 return r;
2427 }
2428 } else {
2429 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2430 if (r) {
2431 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2432 adev->ip_blocks[i].version->funcs->name, r);
2433 return r;
7a3e0bb2 2434 }
7a3e0bb2 2435 }
482f0e53
ML
2436
2437 adev->ip_blocks[i].status.hw = true;
2438 break;
7a3e0bb2
RZ
2439 }
2440 }
482f0e53 2441
8973d9ec
ED
2442 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2443 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2444
80f41f84 2445 return r;
7a3e0bb2
RZ
2446}
2447
5fd8518d
AG
2448static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2449{
2450 long timeout;
2451 int r, i;
2452
2453 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2454 struct amdgpu_ring *ring = adev->rings[i];
2455
2456 /* No need to setup the GPU scheduler for rings that don't need it */
2457 if (!ring || ring->no_scheduler)
2458 continue;
2459
2460 switch (ring->funcs->type) {
2461 case AMDGPU_RING_TYPE_GFX:
2462 timeout = adev->gfx_timeout;
2463 break;
2464 case AMDGPU_RING_TYPE_COMPUTE:
2465 timeout = adev->compute_timeout;
2466 break;
2467 case AMDGPU_RING_TYPE_SDMA:
2468 timeout = adev->sdma_timeout;
2469 break;
2470 default:
2471 timeout = adev->video_timeout;
2472 break;
2473 }
2474
2475 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2476 ring->num_hw_submission, 0,
8ab62eda
JG
2477 timeout, adev->reset_domain->wq,
2478 ring->sched_score, ring->name,
2479 adev->dev);
5fd8518d
AG
2480 if (r) {
2481 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2482 ring->name);
2483 return r;
2484 }
2485 }
2486
d425c6f4
JZ
2487 amdgpu_xcp_update_partition_sched_list(adev);
2488
5fd8518d
AG
2489 return 0;
2490}
2491
2492
e3ecdffa
AD
2493/**
2494 * amdgpu_device_ip_init - run init for hardware IPs
2495 *
2496 * @adev: amdgpu_device pointer
2497 *
2498 * Main initialization pass for hardware IPs. The list of all the hardware
2499 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2500 * are run. sw_init initializes the software state associated with each IP
2501 * and hw_init initializes the hardware associated with each IP.
2502 * Returns 0 on success, negative error code on failure.
2503 */
06ec9070 2504static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2505{
2506 int i, r;
2507
c030f2e4 2508 r = amdgpu_ras_init(adev);
2509 if (r)
2510 return r;
2511
d38ceaf9 2512 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2513 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2514 continue;
a1255107 2515 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2516 if (r) {
a1255107
AD
2517 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2518 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2519 goto init_failed;
2c1a2784 2520 }
a1255107 2521 adev->ip_blocks[i].status.sw = true;
bfca0289 2522
c1c39032
AD
2523 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2524 /* need to do common hw init early so everything is set up for gmc */
2525 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2526 if (r) {
2527 DRM_ERROR("hw_init %d failed %d\n", i, r);
2528 goto init_failed;
2529 }
2530 adev->ip_blocks[i].status.hw = true;
2531 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2532 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2533 /* Try to reserve bad pages early */
2534 if (amdgpu_sriov_vf(adev))
2535 amdgpu_virt_exchange_data(adev);
2536
7ccfd79f 2537 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2538 if (r) {
7ccfd79f 2539 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2540 goto init_failed;
2c1a2784 2541 }
a1255107 2542 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2543 if (r) {
2544 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2545 goto init_failed;
2c1a2784 2546 }
06ec9070 2547 r = amdgpu_device_wb_init(adev);
2c1a2784 2548 if (r) {
06ec9070 2549 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2550 goto init_failed;
2c1a2784 2551 }
a1255107 2552 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2553
2554 /* right after GMC hw init, we create CSA */
02ff519e 2555 if (adev->gfx.mcbp) {
1e256e27 2556 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2557 AMDGPU_GEM_DOMAIN_VRAM |
2558 AMDGPU_GEM_DOMAIN_GTT,
2559 AMDGPU_CSA_SIZE);
2493664f
ML
2560 if (r) {
2561 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2562 goto init_failed;
2493664f
ML
2563 }
2564 }
d38ceaf9
AD
2565 }
2566 }
2567
c9ffa427 2568 if (amdgpu_sriov_vf(adev))
22c16d25 2569 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2570
533aed27
AG
2571 r = amdgpu_ib_pool_init(adev);
2572 if (r) {
2573 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2574 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2575 goto init_failed;
2576 }
2577
c8963ea4
RZ
2578 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2579 if (r)
72d3f592 2580 goto init_failed;
0a4f2520
RZ
2581
2582 r = amdgpu_device_ip_hw_init_phase1(adev);
2583 if (r)
72d3f592 2584 goto init_failed;
0a4f2520 2585
7a3e0bb2
RZ
2586 r = amdgpu_device_fw_loading(adev);
2587 if (r)
72d3f592 2588 goto init_failed;
7a3e0bb2 2589
0a4f2520
RZ
2590 r = amdgpu_device_ip_hw_init_phase2(adev);
2591 if (r)
72d3f592 2592 goto init_failed;
d38ceaf9 2593
121a2bc6
AG
2594 /*
2595 * retired pages will be loaded from eeprom and reserved here,
2596 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2597 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2598 * for I2C communication which only true at this point.
b82e65a9
GC
2599 *
2600 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2601 * failure from bad gpu situation and stop amdgpu init process
2602 * accordingly. For other failed cases, it will still release all
2603 * the resource and print error message, rather than returning one
2604 * negative value to upper level.
121a2bc6
AG
2605 *
2606 * Note: theoretically, this should be called before all vram allocations
2607 * to protect retired page from abusing
2608 */
b82e65a9
GC
2609 r = amdgpu_ras_recovery_init(adev);
2610 if (r)
2611 goto init_failed;
121a2bc6 2612
cfbb6b00
AG
2613 /**
2614 * In case of XGMI grab extra reference for reset domain for this device
2615 */
a4c63caf 2616 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2617 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2618 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2619 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2620
dfd0287b
LH
2621 if (WARN_ON(!hive)) {
2622 r = -ENOENT;
2623 goto init_failed;
2624 }
2625
46c67660 2626 if (!hive->reset_domain ||
2627 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2628 r = -ENOENT;
2629 amdgpu_put_xgmi_hive(hive);
2630 goto init_failed;
2631 }
2632
2633 /* Drop the early temporary reset domain we created for device */
2634 amdgpu_reset_put_reset_domain(adev->reset_domain);
2635 adev->reset_domain = hive->reset_domain;
9dfa4860 2636 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2637 }
a4c63caf
AG
2638 }
2639 }
2640
5fd8518d
AG
2641 r = amdgpu_device_init_schedulers(adev);
2642 if (r)
2643 goto init_failed;
e3c1b071 2644
2645 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2646 if (!adev->gmc.xgmi.pending_reset) {
2647 kgd2kfd_init_zone_device(adev);
e3c1b071 2648 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2649 }
c6332b97 2650
bd607166
KR
2651 amdgpu_fru_get_product_info(adev);
2652
72d3f592 2653init_failed:
c6332b97 2654
72d3f592 2655 return r;
d38ceaf9
AD
2656}
2657
e3ecdffa
AD
2658/**
2659 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2660 *
2661 * @adev: amdgpu_device pointer
2662 *
2663 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2664 * this function before a GPU reset. If the value is retained after a
2665 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2666 */
06ec9070 2667static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2668{
2669 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2670}
2671
e3ecdffa
AD
2672/**
2673 * amdgpu_device_check_vram_lost - check if vram is valid
2674 *
2675 * @adev: amdgpu_device pointer
2676 *
2677 * Checks the reset magic value written to the gart pointer in VRAM.
2678 * The driver calls this after a GPU reset to see if the contents of
2679 * VRAM is lost or now.
2680 * returns true if vram is lost, false if not.
2681 */
06ec9070 2682static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2683{
dadce777
EQ
2684 if (memcmp(adev->gart.ptr, adev->reset_magic,
2685 AMDGPU_RESET_MAGIC_NUM))
2686 return true;
2687
53b3f8f4 2688 if (!amdgpu_in_reset(adev))
dadce777
EQ
2689 return false;
2690
2691 /*
2692 * For all ASICs with baco/mode1 reset, the VRAM is
2693 * always assumed to be lost.
2694 */
2695 switch (amdgpu_asic_reset_method(adev)) {
2696 case AMD_RESET_METHOD_BACO:
2697 case AMD_RESET_METHOD_MODE1:
2698 return true;
2699 default:
2700 return false;
2701 }
0c49e0b8
CZ
2702}
2703
e3ecdffa 2704/**
1112a46b 2705 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2706 *
2707 * @adev: amdgpu_device pointer
b8b72130 2708 * @state: clockgating state (gate or ungate)
e3ecdffa 2709 *
e3ecdffa 2710 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2711 * set_clockgating_state callbacks are run.
2712 * Late initialization pass enabling clockgating for hardware IPs.
2713 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2714 * Returns 0 on success, negative error code on failure.
2715 */
fdd34271 2716
5d89bb2d
LL
2717int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2718 enum amd_clockgating_state state)
d38ceaf9 2719{
1112a46b 2720 int i, j, r;
d38ceaf9 2721
4a2ba394
SL
2722 if (amdgpu_emu_mode == 1)
2723 return 0;
2724
1112a46b
RZ
2725 for (j = 0; j < adev->num_ip_blocks; j++) {
2726 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2727 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2728 continue;
47198eb7 2729 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2730 if (adev->in_s0ix &&
47198eb7
AD
2731 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2732 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2733 continue;
4a446d55 2734 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2735 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2736 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2737 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2738 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2739 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2740 /* enable clockgating to save power */
a1255107 2741 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2742 state);
4a446d55
AD
2743 if (r) {
2744 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2745 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2746 return r;
2747 }
b0b00ff1 2748 }
d38ceaf9 2749 }
06b18f61 2750
c9f96fd5
RZ
2751 return 0;
2752}
2753
5d89bb2d
LL
2754int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2755 enum amd_powergating_state state)
c9f96fd5 2756{
1112a46b 2757 int i, j, r;
06b18f61 2758
c9f96fd5
RZ
2759 if (amdgpu_emu_mode == 1)
2760 return 0;
2761
1112a46b
RZ
2762 for (j = 0; j < adev->num_ip_blocks; j++) {
2763 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2764 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2765 continue;
47198eb7 2766 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2767 if (adev->in_s0ix &&
47198eb7
AD
2768 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2769 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2770 continue;
c9f96fd5
RZ
2771 /* skip CG for VCE/UVD, it's handled specially */
2772 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2773 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2774 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2775 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2776 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2777 /* enable powergating to save power */
2778 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2779 state);
c9f96fd5
RZ
2780 if (r) {
2781 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2782 adev->ip_blocks[i].version->funcs->name, r);
2783 return r;
2784 }
2785 }
2786 }
2dc80b00
S
2787 return 0;
2788}
2789
beff74bc
AD
2790static int amdgpu_device_enable_mgpu_fan_boost(void)
2791{
2792 struct amdgpu_gpu_instance *gpu_ins;
2793 struct amdgpu_device *adev;
2794 int i, ret = 0;
2795
2796 mutex_lock(&mgpu_info.mutex);
2797
2798 /*
2799 * MGPU fan boost feature should be enabled
2800 * only when there are two or more dGPUs in
2801 * the system
2802 */
2803 if (mgpu_info.num_dgpu < 2)
2804 goto out;
2805
2806 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2807 gpu_ins = &(mgpu_info.gpu_ins[i]);
2808 adev = gpu_ins->adev;
2809 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2810 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2811 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2812 if (ret)
2813 break;
2814
2815 gpu_ins->mgpu_fan_enabled = 1;
2816 }
2817 }
2818
2819out:
2820 mutex_unlock(&mgpu_info.mutex);
2821
2822 return ret;
2823}
2824
e3ecdffa
AD
2825/**
2826 * amdgpu_device_ip_late_init - run late init for hardware IPs
2827 *
2828 * @adev: amdgpu_device pointer
2829 *
2830 * Late initialization pass for hardware IPs. The list of all the hardware
2831 * IPs that make up the asic is walked and the late_init callbacks are run.
2832 * late_init covers any special initialization that an IP requires
2833 * after all of the have been initialized or something that needs to happen
2834 * late in the init process.
2835 * Returns 0 on success, negative error code on failure.
2836 */
06ec9070 2837static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2838{
60599a03 2839 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2840 int i = 0, r;
2841
2842 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2843 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2844 continue;
2845 if (adev->ip_blocks[i].version->funcs->late_init) {
2846 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2847 if (r) {
2848 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2849 adev->ip_blocks[i].version->funcs->name, r);
2850 return r;
2851 }
2dc80b00 2852 }
73f847db 2853 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2854 }
2855
867e24ca 2856 r = amdgpu_ras_late_init(adev);
2857 if (r) {
2858 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2859 return r;
2860 }
2861
a891d239
DL
2862 amdgpu_ras_set_error_query_ready(adev, true);
2863
1112a46b
RZ
2864 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2865 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2866
06ec9070 2867 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2868
beff74bc
AD
2869 r = amdgpu_device_enable_mgpu_fan_boost();
2870 if (r)
2871 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2872
4da8b639 2873 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2874 if (amdgpu_passthrough(adev) &&
2875 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2876 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2877 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2878
2879 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2880 mutex_lock(&mgpu_info.mutex);
2881
2882 /*
2883 * Reset device p-state to low as this was booted with high.
2884 *
2885 * This should be performed only after all devices from the same
2886 * hive get initialized.
2887 *
2888 * However, it's unknown how many device in the hive in advance.
2889 * As this is counted one by one during devices initializations.
2890 *
2891 * So, we wait for all XGMI interlinked devices initialized.
2892 * This may bring some delays as those devices may come from
2893 * different hives. But that should be OK.
2894 */
2895 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2896 for (i = 0; i < mgpu_info.num_gpu; i++) {
2897 gpu_instance = &(mgpu_info.gpu_ins[i]);
2898 if (gpu_instance->adev->flags & AMD_IS_APU)
2899 continue;
2900
d84a430d
JK
2901 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2902 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2903 if (r) {
2904 DRM_ERROR("pstate setting failed (%d).\n", r);
2905 break;
2906 }
2907 }
2908 }
2909
2910 mutex_unlock(&mgpu_info.mutex);
2911 }
2912
d38ceaf9
AD
2913 return 0;
2914}
2915
613aa3ea
LY
2916/**
2917 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2918 *
2919 * @adev: amdgpu_device pointer
2920 *
2921 * For ASICs need to disable SMC first
2922 */
2923static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2924{
2925 int i, r;
2926
2927 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2928 return;
2929
2930 for (i = 0; i < adev->num_ip_blocks; i++) {
2931 if (!adev->ip_blocks[i].status.hw)
2932 continue;
2933 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2934 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2935 /* XXX handle errors */
2936 if (r) {
2937 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2938 adev->ip_blocks[i].version->funcs->name, r);
2939 }
2940 adev->ip_blocks[i].status.hw = false;
2941 break;
2942 }
2943 }
2944}
2945
e9669fb7 2946static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2947{
2948 int i, r;
2949
e9669fb7
AG
2950 for (i = 0; i < adev->num_ip_blocks; i++) {
2951 if (!adev->ip_blocks[i].version->funcs->early_fini)
2952 continue;
5278a159 2953
e9669fb7
AG
2954 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2955 if (r) {
2956 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2957 adev->ip_blocks[i].version->funcs->name, r);
2958 }
2959 }
c030f2e4 2960
05df1f01 2961 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2962 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2963
7270e895
TY
2964 amdgpu_amdkfd_suspend(adev, false);
2965
613aa3ea
LY
2966 /* Workaroud for ASICs need to disable SMC first */
2967 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2968
d38ceaf9 2969 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2970 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2971 continue;
8201a67a 2972
a1255107 2973 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2974 /* XXX handle errors */
2c1a2784 2975 if (r) {
a1255107
AD
2976 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2977 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2978 }
8201a67a 2979
a1255107 2980 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2981 }
2982
6effad8a
GC
2983 if (amdgpu_sriov_vf(adev)) {
2984 if (amdgpu_virt_release_full_gpu(adev, false))
2985 DRM_ERROR("failed to release exclusive mode on fini\n");
2986 }
2987
e9669fb7
AG
2988 return 0;
2989}
2990
2991/**
2992 * amdgpu_device_ip_fini - run fini for hardware IPs
2993 *
2994 * @adev: amdgpu_device pointer
2995 *
2996 * Main teardown pass for hardware IPs. The list of all the hardware
2997 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2998 * are run. hw_fini tears down the hardware associated with each IP
2999 * and sw_fini tears down any software state associated with each IP.
3000 * Returns 0 on success, negative error code on failure.
3001 */
3002static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3003{
3004 int i, r;
3005
3006 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3007 amdgpu_virt_release_ras_err_handler_data(adev);
3008
e9669fb7
AG
3009 if (adev->gmc.xgmi.num_physical_nodes > 1)
3010 amdgpu_xgmi_remove_device(adev);
3011
c004d44e 3012 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3013
d38ceaf9 3014 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3015 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3016 continue;
c12aba3a
ML
3017
3018 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3019 amdgpu_ucode_free_bo(adev);
1e256e27 3020 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3021 amdgpu_device_wb_fini(adev);
7ccfd79f 3022 amdgpu_device_mem_scratch_fini(adev);
533aed27 3023 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3024 }
3025
a1255107 3026 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3027 /* XXX handle errors */
2c1a2784 3028 if (r) {
a1255107
AD
3029 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3030 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3031 }
a1255107
AD
3032 adev->ip_blocks[i].status.sw = false;
3033 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3034 }
3035
a6dcfd9c 3036 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3037 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3038 continue;
a1255107
AD
3039 if (adev->ip_blocks[i].version->funcs->late_fini)
3040 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3041 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3042 }
3043
c030f2e4 3044 amdgpu_ras_fini(adev);
3045
d38ceaf9
AD
3046 return 0;
3047}
3048
e3ecdffa 3049/**
beff74bc 3050 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3051 *
1112a46b 3052 * @work: work_struct.
e3ecdffa 3053 */
beff74bc 3054static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3055{
3056 struct amdgpu_device *adev =
beff74bc 3057 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3058 int r;
3059
3060 r = amdgpu_ib_ring_tests(adev);
3061 if (r)
3062 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3063}
3064
1e317b99
RZ
3065static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3066{
3067 struct amdgpu_device *adev =
3068 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3069
90a92662
MD
3070 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3071 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3072
3073 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3074 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3075}
3076
e3ecdffa 3077/**
e7854a03 3078 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3079 *
3080 * @adev: amdgpu_device pointer
3081 *
3082 * Main suspend function for hardware IPs. The list of all the hardware
3083 * IPs that make up the asic is walked, clockgating is disabled and the
3084 * suspend callbacks are run. suspend puts the hardware and software state
3085 * in each IP into a state suitable for suspend.
3086 * Returns 0 on success, negative error code on failure.
3087 */
e7854a03
AD
3088static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3089{
3090 int i, r;
3091
50ec83f0
AD
3092 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3093 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3094
b31d6ada
EQ
3095 /*
3096 * Per PMFW team's suggestion, driver needs to handle gfxoff
3097 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3098 * scenario. Add the missing df cstate disablement here.
3099 */
3100 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3101 dev_warn(adev->dev, "Failed to disallow df cstate");
3102
e7854a03
AD
3103 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3104 if (!adev->ip_blocks[i].status.valid)
3105 continue;
2b9f7848 3106
e7854a03 3107 /* displays are handled separately */
2b9f7848
ND
3108 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3109 continue;
3110
3111 /* XXX handle errors */
3112 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3113 /* XXX handle errors */
3114 if (r) {
3115 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3116 adev->ip_blocks[i].version->funcs->name, r);
3117 return r;
e7854a03 3118 }
2b9f7848
ND
3119
3120 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3121 }
3122
e7854a03
AD
3123 return 0;
3124}
3125
3126/**
3127 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3128 *
3129 * @adev: amdgpu_device pointer
3130 *
3131 * Main suspend function for hardware IPs. The list of all the hardware
3132 * IPs that make up the asic is walked, clockgating is disabled and the
3133 * suspend callbacks are run. suspend puts the hardware and software state
3134 * in each IP into a state suitable for suspend.
3135 * Returns 0 on success, negative error code on failure.
3136 */
3137static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3138{
3139 int i, r;
3140
557f42a2 3141 if (adev->in_s0ix)
bc143d8b 3142 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3143
d38ceaf9 3144 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3145 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3146 continue;
e7854a03
AD
3147 /* displays are handled in phase1 */
3148 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3149 continue;
bff77e86
LM
3150 /* PSP lost connection when err_event_athub occurs */
3151 if (amdgpu_ras_intr_triggered() &&
3152 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3153 adev->ip_blocks[i].status.hw = false;
3154 continue;
3155 }
e3c1b071 3156
3157 /* skip unnecessary suspend if we do not initialize them yet */
3158 if (adev->gmc.xgmi.pending_reset &&
3159 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3160 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3161 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3162 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3163 adev->ip_blocks[i].status.hw = false;
3164 continue;
3165 }
557f42a2 3166
afa6646b 3167 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3168 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3169 * like at runtime. PSP is also part of the always on hardware
3170 * so no need to suspend it.
3171 */
557f42a2 3172 if (adev->in_s0ix &&
32ff160d 3173 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3174 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3175 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3176 continue;
3177
2a7798ea
AD
3178 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3179 if (adev->in_s0ix &&
3180 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3181 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3182 continue;
3183
e11c7750
TH
3184 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3185 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3186 * from this location and RLC Autoload automatically also gets loaded
3187 * from here based on PMFW -> PSP message during re-init sequence.
3188 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3189 * the TMR and reload FWs again for IMU enabled APU ASICs.
3190 */
3191 if (amdgpu_in_reset(adev) &&
3192 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3193 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3194 continue;
3195
d38ceaf9 3196 /* XXX handle errors */
a1255107 3197 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3198 /* XXX handle errors */
2c1a2784 3199 if (r) {
a1255107
AD
3200 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3201 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3202 }
876923fb 3203 adev->ip_blocks[i].status.hw = false;
a3a09142 3204 /* handle putting the SMC in the appropriate state */
47fc644f 3205 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3206 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3207 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3208 if (r) {
3209 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3210 adev->mp1_state, r);
3211 return r;
3212 }
a3a09142
AD
3213 }
3214 }
d38ceaf9
AD
3215 }
3216
3217 return 0;
3218}
3219
e7854a03
AD
3220/**
3221 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3222 *
3223 * @adev: amdgpu_device pointer
3224 *
3225 * Main suspend function for hardware IPs. The list of all the hardware
3226 * IPs that make up the asic is walked, clockgating is disabled and the
3227 * suspend callbacks are run. suspend puts the hardware and software state
3228 * in each IP into a state suitable for suspend.
3229 * Returns 0 on success, negative error code on failure.
3230 */
3231int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3232{
3233 int r;
3234
3c73683c
JC
3235 if (amdgpu_sriov_vf(adev)) {
3236 amdgpu_virt_fini_data_exchange(adev);
e7819644 3237 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3238 }
e7819644 3239
e7854a03
AD
3240 r = amdgpu_device_ip_suspend_phase1(adev);
3241 if (r)
3242 return r;
3243 r = amdgpu_device_ip_suspend_phase2(adev);
3244
e7819644
YT
3245 if (amdgpu_sriov_vf(adev))
3246 amdgpu_virt_release_full_gpu(adev, false);
3247
e7854a03
AD
3248 return r;
3249}
3250
06ec9070 3251static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3252{
3253 int i, r;
3254
2cb681b6 3255 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3256 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3257 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3258 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3259 AMD_IP_BLOCK_TYPE_IH,
3260 };
a90ad3c2 3261
95ea3dbc 3262 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3263 int j;
3264 struct amdgpu_ip_block *block;
a90ad3c2 3265
4cd2a96d
J
3266 block = &adev->ip_blocks[i];
3267 block->status.hw = false;
2cb681b6 3268
4cd2a96d 3269 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3270
4cd2a96d 3271 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3272 !block->status.valid)
3273 continue;
3274
3275 r = block->version->funcs->hw_init(adev);
0aaeefcc 3276 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3277 if (r)
3278 return r;
482f0e53 3279 block->status.hw = true;
a90ad3c2
ML
3280 }
3281 }
3282
3283 return 0;
3284}
3285
06ec9070 3286static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3287{
3288 int i, r;
3289
2cb681b6
ML
3290 static enum amd_ip_block_type ip_order[] = {
3291 AMD_IP_BLOCK_TYPE_SMC,
3292 AMD_IP_BLOCK_TYPE_DCE,
3293 AMD_IP_BLOCK_TYPE_GFX,
3294 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3295 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3296 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3297 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3298 AMD_IP_BLOCK_TYPE_VCN,
3299 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3300 };
a90ad3c2 3301
2cb681b6
ML
3302 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3303 int j;
3304 struct amdgpu_ip_block *block;
a90ad3c2 3305
2cb681b6
ML
3306 for (j = 0; j < adev->num_ip_blocks; j++) {
3307 block = &adev->ip_blocks[j];
3308
3309 if (block->version->type != ip_order[i] ||
482f0e53
ML
3310 !block->status.valid ||
3311 block->status.hw)
2cb681b6
ML
3312 continue;
3313
895bd048
JZ
3314 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3315 r = block->version->funcs->resume(adev);
3316 else
3317 r = block->version->funcs->hw_init(adev);
3318
0aaeefcc 3319 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3320 if (r)
3321 return r;
482f0e53 3322 block->status.hw = true;
a90ad3c2
ML
3323 }
3324 }
3325
3326 return 0;
3327}
3328
e3ecdffa
AD
3329/**
3330 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3331 *
3332 * @adev: amdgpu_device pointer
3333 *
3334 * First resume function for hardware IPs. The list of all the hardware
3335 * IPs that make up the asic is walked and the resume callbacks are run for
3336 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3337 * after a suspend and updates the software state as necessary. This
3338 * function is also used for restoring the GPU after a GPU reset.
3339 * Returns 0 on success, negative error code on failure.
3340 */
06ec9070 3341static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3342{
3343 int i, r;
3344
a90ad3c2 3345 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3346 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3347 continue;
a90ad3c2 3348 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3349 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3350 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3351 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3352
fcf0649f
CZ
3353 r = adev->ip_blocks[i].version->funcs->resume(adev);
3354 if (r) {
3355 DRM_ERROR("resume of IP block <%s> failed %d\n",
3356 adev->ip_blocks[i].version->funcs->name, r);
3357 return r;
3358 }
482f0e53 3359 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3360 }
3361 }
3362
3363 return 0;
3364}
3365
e3ecdffa
AD
3366/**
3367 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3368 *
3369 * @adev: amdgpu_device pointer
3370 *
3371 * First resume function for hardware IPs. The list of all the hardware
3372 * IPs that make up the asic is walked and the resume callbacks are run for
3373 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3374 * functional state after a suspend and updates the software state as
3375 * necessary. This function is also used for restoring the GPU after a GPU
3376 * reset.
3377 * Returns 0 on success, negative error code on failure.
3378 */
06ec9070 3379static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3380{
3381 int i, r;
3382
3383 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3384 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3385 continue;
fcf0649f 3386 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3387 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3388 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3389 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3390 continue;
a1255107 3391 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3392 if (r) {
a1255107
AD
3393 DRM_ERROR("resume of IP block <%s> failed %d\n",
3394 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3395 return r;
2c1a2784 3396 }
482f0e53 3397 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3398 }
3399
3400 return 0;
3401}
3402
e3ecdffa
AD
3403/**
3404 * amdgpu_device_ip_resume - run resume for hardware IPs
3405 *
3406 * @adev: amdgpu_device pointer
3407 *
3408 * Main resume function for hardware IPs. The hardware IPs
3409 * are split into two resume functions because they are
3410 * are also used in in recovering from a GPU reset and some additional
3411 * steps need to be take between them. In this case (S3/S4) they are
3412 * run sequentially.
3413 * Returns 0 on success, negative error code on failure.
3414 */
06ec9070 3415static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3416{
3417 int r;
3418
f2206741
AL
3419 if (!adev->in_s0ix) {
3420 r = amdgpu_amdkfd_resume_iommu(adev);
3421 if (r)
3422 return r;
3423 }
9cec53c1 3424
06ec9070 3425 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3426 if (r)
3427 return r;
7a3e0bb2
RZ
3428
3429 r = amdgpu_device_fw_loading(adev);
3430 if (r)
3431 return r;
3432
06ec9070 3433 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3434
3435 return r;
3436}
3437
e3ecdffa
AD
3438/**
3439 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3440 *
3441 * @adev: amdgpu_device pointer
3442 *
3443 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3444 */
4e99a44e 3445static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3446{
6867e1b5
ML
3447 if (amdgpu_sriov_vf(adev)) {
3448 if (adev->is_atom_fw) {
58ff791a 3449 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3450 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3451 } else {
3452 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3453 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3454 }
3455
3456 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3457 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3458 }
048765ad
AR
3459}
3460
e3ecdffa
AD
3461/**
3462 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3463 *
3464 * @asic_type: AMD asic type
3465 *
3466 * Check if there is DC (new modesetting infrastructre) support for an asic.
3467 * returns true if DC has support, false if not.
3468 */
4562236b
HW
3469bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3470{
3471 switch (asic_type) {
0637d417
AD
3472#ifdef CONFIG_DRM_AMDGPU_SI
3473 case CHIP_HAINAN:
3474#endif
3475 case CHIP_TOPAZ:
3476 /* chips with no display hardware */
3477 return false;
4562236b 3478#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3479 case CHIP_TAHITI:
3480 case CHIP_PITCAIRN:
3481 case CHIP_VERDE:
3482 case CHIP_OLAND:
2d32ffd6
AD
3483 /*
3484 * We have systems in the wild with these ASICs that require
3485 * LVDS and VGA support which is not supported with DC.
3486 *
3487 * Fallback to the non-DC driver here by default so as not to
3488 * cause regressions.
3489 */
3490#if defined(CONFIG_DRM_AMD_DC_SI)
3491 return amdgpu_dc > 0;
3492#else
3493 return false;
64200c46 3494#endif
4562236b 3495 case CHIP_BONAIRE:
0d6fbccb 3496 case CHIP_KAVERI:
367e6687
AD
3497 case CHIP_KABINI:
3498 case CHIP_MULLINS:
d9fda248
HW
3499 /*
3500 * We have systems in the wild with these ASICs that require
b5a0168e 3501 * VGA support which is not supported with DC.
d9fda248
HW
3502 *
3503 * Fallback to the non-DC driver here by default so as not to
3504 * cause regressions.
3505 */
3506 return amdgpu_dc > 0;
f7f12b25 3507 default:
fd187853 3508 return amdgpu_dc != 0;
f7f12b25 3509#else
4562236b 3510 default:
93b09a9a 3511 if (amdgpu_dc > 0)
044a48f4 3512 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3513 "but isn't supported by ASIC, ignoring\n");
4562236b 3514 return false;
f7f12b25 3515#endif
4562236b
HW
3516 }
3517}
3518
3519/**
3520 * amdgpu_device_has_dc_support - check if dc is supported
3521 *
982a820b 3522 * @adev: amdgpu_device pointer
4562236b
HW
3523 *
3524 * Returns true for supported, false for not supported
3525 */
3526bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3527{
25263da3 3528 if (adev->enable_virtual_display ||
abaf210c 3529 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3530 return false;
3531
4562236b
HW
3532 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3533}
3534
d4535e2c
AG
3535static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3536{
3537 struct amdgpu_device *adev =
3538 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3539 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3540
c6a6e2db
AG
3541 /* It's a bug to not have a hive within this function */
3542 if (WARN_ON(!hive))
3543 return;
3544
3545 /*
3546 * Use task barrier to synchronize all xgmi reset works across the
3547 * hive. task_barrier_enter and task_barrier_exit will block
3548 * until all the threads running the xgmi reset works reach
3549 * those points. task_barrier_full will do both blocks.
3550 */
3551 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3552
3553 task_barrier_enter(&hive->tb);
4a580877 3554 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3555
3556 if (adev->asic_reset_res)
3557 goto fail;
3558
3559 task_barrier_exit(&hive->tb);
4a580877 3560 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3561
3562 if (adev->asic_reset_res)
3563 goto fail;
43c4d576 3564
5e67bba3 3565 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3566 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3567 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3568 } else {
3569
3570 task_barrier_full(&hive->tb);
3571 adev->asic_reset_res = amdgpu_asic_reset(adev);
3572 }
ce316fa5 3573
c6a6e2db 3574fail:
d4535e2c 3575 if (adev->asic_reset_res)
fed184e9 3576 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3577 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3578 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3579}
3580
71f98027
AD
3581static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3582{
3583 char *input = amdgpu_lockup_timeout;
3584 char *timeout_setting = NULL;
3585 int index = 0;
3586 long timeout;
3587 int ret = 0;
3588
3589 /*
67387dfe
AD
3590 * By default timeout for non compute jobs is 10000
3591 * and 60000 for compute jobs.
71f98027 3592 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3593 * jobs are 60000 by default.
71f98027
AD
3594 */
3595 adev->gfx_timeout = msecs_to_jiffies(10000);
3596 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3597 if (amdgpu_sriov_vf(adev))
3598 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3599 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3600 else
67387dfe 3601 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3602
f440ff44 3603 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3604 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3605 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3606 ret = kstrtol(timeout_setting, 0, &timeout);
3607 if (ret)
3608 return ret;
3609
3610 if (timeout == 0) {
3611 index++;
3612 continue;
3613 } else if (timeout < 0) {
3614 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3615 dev_warn(adev->dev, "lockup timeout disabled");
3616 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3617 } else {
3618 timeout = msecs_to_jiffies(timeout);
3619 }
3620
3621 switch (index++) {
3622 case 0:
3623 adev->gfx_timeout = timeout;
3624 break;
3625 case 1:
3626 adev->compute_timeout = timeout;
3627 break;
3628 case 2:
3629 adev->sdma_timeout = timeout;
3630 break;
3631 case 3:
3632 adev->video_timeout = timeout;
3633 break;
3634 default:
3635 break;
3636 }
3637 }
3638 /*
3639 * There is only one value specified and
3640 * it should apply to all non-compute jobs.
3641 */
bcccee89 3642 if (index == 1) {
71f98027 3643 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3644 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3645 adev->compute_timeout = adev->gfx_timeout;
3646 }
71f98027
AD
3647 }
3648
3649 return ret;
3650}
d4535e2c 3651
4a74c38c
PY
3652/**
3653 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3654 *
3655 * @adev: amdgpu_device pointer
3656 *
3657 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3658 */
3659static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3660{
3661 struct iommu_domain *domain;
3662
3663 domain = iommu_get_domain_for_dev(adev->dev);
3664 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3665 adev->ram_is_direct_mapped = true;
3666}
3667
77f3a5cd
ND
3668static const struct attribute *amdgpu_dev_attributes[] = {
3669 &dev_attr_product_name.attr,
3670 &dev_attr_product_number.attr,
3671 &dev_attr_serial_number.attr,
3672 &dev_attr_pcie_replay_count.attr,
3673 NULL
3674};
3675
02ff519e
AD
3676static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3677{
3678 if (amdgpu_mcbp == 1)
3679 adev->gfx.mcbp = true;
3680
50a7c876
AD
3681 if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
3682 (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
3683 adev->gfx.num_gfx_rings)
3684 adev->gfx.mcbp = true;
3685
02ff519e
AD
3686 if (amdgpu_sriov_vf(adev))
3687 adev->gfx.mcbp = true;
3688
3689 if (adev->gfx.mcbp)
3690 DRM_INFO("MCBP is enabled\n");
3691}
3692
d38ceaf9
AD
3693/**
3694 * amdgpu_device_init - initialize the driver
3695 *
3696 * @adev: amdgpu_device pointer
d38ceaf9
AD
3697 * @flags: driver flags
3698 *
3699 * Initializes the driver info and hw (all asics).
3700 * Returns 0 for success or an error on failure.
3701 * Called at driver startup.
3702 */
3703int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3704 uint32_t flags)
3705{
8aba21b7
LT
3706 struct drm_device *ddev = adev_to_drm(adev);
3707 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3708 int r, i;
b98c6299 3709 bool px = false;
95844d20 3710 u32 max_MBps;
59e9fff1 3711 int tmp;
d38ceaf9
AD
3712
3713 adev->shutdown = false;
d38ceaf9 3714 adev->flags = flags;
4e66d7d2
YZ
3715
3716 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3717 adev->asic_type = amdgpu_force_asic_type;
3718 else
3719 adev->asic_type = flags & AMD_ASIC_MASK;
3720
d38ceaf9 3721 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3722 if (amdgpu_emu_mode == 1)
8bdab6bb 3723 adev->usec_timeout *= 10;
770d13b1 3724 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3725 adev->accel_working = false;
3726 adev->num_rings = 0;
68ce8b24 3727 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3728 adev->mman.buffer_funcs = NULL;
3729 adev->mman.buffer_funcs_ring = NULL;
3730 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3731 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3732 adev->gmc.gmc_funcs = NULL;
7bd939d0 3733 adev->harvest_ip_mask = 0x0;
f54d1867 3734 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3735 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3736
3737 adev->smc_rreg = &amdgpu_invalid_rreg;
3738 adev->smc_wreg = &amdgpu_invalid_wreg;
3739 adev->pcie_rreg = &amdgpu_invalid_rreg;
3740 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3741 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3742 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3743 adev->pciep_rreg = &amdgpu_invalid_rreg;
3744 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3745 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3746 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3747 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3748 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3749 adev->didt_rreg = &amdgpu_invalid_rreg;
3750 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3751 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3752 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3753 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3754 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3755
3e39ab90
AD
3756 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3757 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3758 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3759
3760 /* mutex initialization are all done here so we
3761 * can recall function without having locking issues */
0e5ca0d1 3762 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3763 mutex_init(&adev->pm.mutex);
3764 mutex_init(&adev->gfx.gpu_clock_mutex);
3765 mutex_init(&adev->srbm_mutex);
b8866c26 3766 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3767 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3768 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3769 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3770 mutex_init(&adev->mn_lock);
e23b74aa 3771 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3772 hash_init(adev->mn_hash);
32eaeae0 3773 mutex_init(&adev->psp.mutex);
bd052211 3774 mutex_init(&adev->notifier_lock);
8cda7a4f 3775 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3776 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3777
ab3b9de6 3778 amdgpu_device_init_apu_flags(adev);
9f6a7857 3779
912dfc84
EQ
3780 r = amdgpu_device_check_arguments(adev);
3781 if (r)
3782 return r;
d38ceaf9 3783
d38ceaf9
AD
3784 spin_lock_init(&adev->mmio_idx_lock);
3785 spin_lock_init(&adev->smc_idx_lock);
3786 spin_lock_init(&adev->pcie_idx_lock);
3787 spin_lock_init(&adev->uvd_ctx_idx_lock);
3788 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3789 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3790 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3791 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3792 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3793
0c4e7fa5
CZ
3794 INIT_LIST_HEAD(&adev->shadow_list);
3795 mutex_init(&adev->shadow_list_lock);
3796
655ce9cb 3797 INIT_LIST_HEAD(&adev->reset_list);
3798
6492e1b0 3799 INIT_LIST_HEAD(&adev->ras_list);
3800
beff74bc
AD
3801 INIT_DELAYED_WORK(&adev->delayed_init_work,
3802 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3803 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3804 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3805
d4535e2c
AG
3806 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3807
d23ee13f 3808 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3809 adev->gfx.gfx_off_residency = 0;
3810 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3811 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3812
b265bdbd
EQ
3813 atomic_set(&adev->throttling_logging_enabled, 1);
3814 /*
3815 * If throttling continues, logging will be performed every minute
3816 * to avoid log flooding. "-1" is subtracted since the thermal
3817 * throttling interrupt comes every second. Thus, the total logging
3818 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3819 * for throttling interrupt) = 60 seconds.
3820 */
3821 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3822 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3823
0fa49558
AX
3824 /* Registers mapping */
3825 /* TODO: block userspace mapping of io register */
da69c161
KW
3826 if (adev->asic_type >= CHIP_BONAIRE) {
3827 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3828 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3829 } else {
3830 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3831 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3832 }
d38ceaf9 3833
6c08e0ef
EQ
3834 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3835 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3836
d38ceaf9
AD
3837 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3838 if (adev->rmmio == NULL) {
3839 return -ENOMEM;
3840 }
3841 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3842 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3843
436afdfa
PY
3844 /*
3845 * Reset domain needs to be present early, before XGMI hive discovered
3846 * (if any) and intitialized to use reset sem and in_gpu reset flag
3847 * early on during init and before calling to RREG32.
3848 */
3849 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3850 if (!adev->reset_domain)
3851 return -ENOMEM;
3852
3aa0115d
ML
3853 /* detect hw virtualization here */
3854 amdgpu_detect_virtualization(adev);
3855
04e85958
TL
3856 amdgpu_device_get_pcie_info(adev);
3857
dffa11b4
ML
3858 r = amdgpu_device_get_job_timeout_settings(adev);
3859 if (r) {
3860 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3861 return r;
a190d1c7
XY
3862 }
3863
d38ceaf9 3864 /* early init functions */
06ec9070 3865 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3866 if (r)
4ef87d8f 3867 return r;
d38ceaf9 3868
02ff519e
AD
3869 amdgpu_device_set_mcbp(adev);
3870
b7cdb41e
ML
3871 /* Get rid of things like offb */
3872 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3873 if (r)
3874 return r;
3875
4d33e704
SK
3876 /* Enable TMZ based on IP_VERSION */
3877 amdgpu_gmc_tmz_set(adev);
3878
957b0787 3879 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3880 /* Need to get xgmi info early to decide the reset behavior*/
3881 if (adev->gmc.xgmi.supported) {
3882 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3883 if (r)
3884 return r;
3885 }
3886
8e6d0b69 3887 /* enable PCIE atomic ops */
b4520bfd
GW
3888 if (amdgpu_sriov_vf(adev)) {
3889 if (adev->virt.fw_reserve.p_pf2vf)
3890 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3891 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3892 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3893 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3894 * internal path natively support atomics, set have_atomics_support to true.
3895 */
b4520bfd
GW
3896 } else if ((adev->flags & AMD_IS_APU) &&
3897 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
0e768043 3898 adev->have_atomics_support = true;
b4520bfd 3899 } else {
8e6d0b69 3900 adev->have_atomics_support =
3901 !pci_enable_atomic_ops_to_root(adev->pdev,
3902 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3903 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3904 }
3905
8e6d0b69 3906 if (!adev->have_atomics_support)
3907 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3908
6585661d
OZ
3909 /* doorbell bar mapping and doorbell index init*/
3910 amdgpu_device_doorbell_init(adev);
3911
9475a943
SL
3912 if (amdgpu_emu_mode == 1) {
3913 /* post the asic on emulation mode */
3914 emu_soc_asic_init(adev);
bfca0289 3915 goto fence_driver_init;
9475a943 3916 }
bfca0289 3917
04442bf7
LL
3918 amdgpu_reset_init(adev);
3919
4e99a44e 3920 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3921 if (adev->bios)
3922 amdgpu_device_detect_sriov_bios(adev);
048765ad 3923
95e8e59e
AD
3924 /* check if we need to reset the asic
3925 * E.g., driver was not cleanly unloaded previously, etc.
3926 */
f14899fd 3927 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3928 if (adev->gmc.xgmi.num_physical_nodes) {
3929 dev_info(adev->dev, "Pending hive reset.\n");
3930 adev->gmc.xgmi.pending_reset = true;
3931 /* Only need to init necessary block for SMU to handle the reset */
3932 for (i = 0; i < adev->num_ip_blocks; i++) {
3933 if (!adev->ip_blocks[i].status.valid)
3934 continue;
3935 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3936 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3937 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3938 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3939 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3940 adev->ip_blocks[i].version->funcs->name);
3941 adev->ip_blocks[i].status.hw = true;
3942 }
3943 }
3944 } else {
59e9fff1 3945 tmp = amdgpu_reset_method;
3946 /* It should do a default reset when loading or reloading the driver,
3947 * regardless of the module parameter reset_method.
3948 */
3949 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3950 r = amdgpu_asic_reset(adev);
59e9fff1 3951 amdgpu_reset_method = tmp;
e3c1b071 3952 if (r) {
3953 dev_err(adev->dev, "asic reset on init failed\n");
3954 goto failed;
3955 }
95e8e59e
AD
3956 }
3957 }
3958
d38ceaf9 3959 /* Post card if necessary */
39c640c0 3960 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3961 if (!adev->bios) {
bec86378 3962 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3963 r = -EINVAL;
3964 goto failed;
d38ceaf9 3965 }
bec86378 3966 DRM_INFO("GPU posting now...\n");
4d2997ab 3967 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3968 if (r) {
3969 dev_err(adev->dev, "gpu post error!\n");
3970 goto failed;
3971 }
d38ceaf9
AD
3972 }
3973
9535a86a
SZ
3974 if (adev->bios) {
3975 if (adev->is_atom_fw) {
3976 /* Initialize clocks */
3977 r = amdgpu_atomfirmware_get_clock_info(adev);
3978 if (r) {
3979 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3980 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3981 goto failed;
3982 }
3983 } else {
3984 /* Initialize clocks */
3985 r = amdgpu_atombios_get_clock_info(adev);
3986 if (r) {
3987 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3988 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3989 goto failed;
3990 }
3991 /* init i2c buses */
3992 if (!amdgpu_device_has_dc_support(adev))
3993 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3994 }
2c1a2784 3995 }
d38ceaf9 3996
bfca0289 3997fence_driver_init:
d38ceaf9 3998 /* Fence driver */
067f44c8 3999 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4000 if (r) {
067f44c8 4001 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4002 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4003 goto failed;
2c1a2784 4004 }
d38ceaf9
AD
4005
4006 /* init the mode config */
4a580877 4007 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4008
06ec9070 4009 r = amdgpu_device_ip_init(adev);
d38ceaf9 4010 if (r) {
06ec9070 4011 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4012 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4013 goto release_ras_con;
d38ceaf9
AD
4014 }
4015
8d35a259
LG
4016 amdgpu_fence_driver_hw_init(adev);
4017
d69b8971
YZ
4018 dev_info(adev->dev,
4019 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4020 adev->gfx.config.max_shader_engines,
4021 adev->gfx.config.max_sh_per_se,
4022 adev->gfx.config.max_cu_per_sh,
4023 adev->gfx.cu_info.number);
4024
d38ceaf9
AD
4025 adev->accel_working = true;
4026
e59c0205
AX
4027 amdgpu_vm_check_compute_bug(adev);
4028
95844d20
MO
4029 /* Initialize the buffer migration limit. */
4030 if (amdgpu_moverate >= 0)
4031 max_MBps = amdgpu_moverate;
4032 else
4033 max_MBps = 8; /* Allow 8 MB/s. */
4034 /* Get a log2 for easy divisions. */
4035 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4036
184d8384
LL
4037 r = amdgpu_atombios_sysfs_init(adev);
4038 if (r)
4039 drm_err(&adev->ddev,
4040 "registering atombios sysfs failed (%d).\n", r);
4041
d2f52ac8 4042 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
4043 if (r)
4044 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 4045
5bb23532 4046 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
4047 if (r) {
4048 adev->ucode_sysfs_en = false;
5bb23532 4049 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
4050 } else
4051 adev->ucode_sysfs_en = true;
5bb23532 4052
b0adca4d
EQ
4053 /*
4054 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4055 * Otherwise the mgpu fan boost feature will be skipped due to the
4056 * gpu instance is counted less.
4057 */
4058 amdgpu_register_gpu_instance(adev);
4059
d38ceaf9
AD
4060 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4061 * explicit gating rather than handling it automatically.
4062 */
e3c1b071 4063 if (!adev->gmc.xgmi.pending_reset) {
4064 r = amdgpu_device_ip_late_init(adev);
4065 if (r) {
4066 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4067 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4068 goto release_ras_con;
e3c1b071 4069 }
4070 /* must succeed. */
4071 amdgpu_ras_resume(adev);
4072 queue_delayed_work(system_wq, &adev->delayed_init_work,
4073 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4074 }
d38ceaf9 4075
38eecbe0
CL
4076 if (amdgpu_sriov_vf(adev)) {
4077 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4078 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4079 }
2c738637 4080
77f3a5cd 4081 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4082 if (r)
77f3a5cd 4083 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4084
d155bef0
AB
4085 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4086 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4087 if (r)
4088 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4089
c1dd4aa6
AG
4090 /* Have stored pci confspace at hand for restore in sudden PCI error */
4091 if (amdgpu_device_cache_pci_state(adev->pdev))
4092 pci_restore_state(pdev);
4093
8c3dd61c
KHF
4094 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4095 /* this will fail for cards that aren't VGA class devices, just
4096 * ignore it */
4097 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4098 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4099
d37a3929
OC
4100 px = amdgpu_device_supports_px(ddev);
4101
4102 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4103 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4104 vga_switcheroo_register_client(adev->pdev,
4105 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4106
4107 if (px)
8c3dd61c 4108 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4109
e3c1b071 4110 if (adev->gmc.xgmi.pending_reset)
4111 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4112 msecs_to_jiffies(AMDGPU_RESUME_MS));
4113
4a74c38c
PY
4114 amdgpu_device_check_iommu_direct_map(adev);
4115
d38ceaf9 4116 return 0;
83ba126a 4117
970fd197 4118release_ras_con:
38eecbe0
CL
4119 if (amdgpu_sriov_vf(adev))
4120 amdgpu_virt_release_full_gpu(adev, true);
4121
4122 /* failed in exclusive mode due to timeout */
4123 if (amdgpu_sriov_vf(adev) &&
4124 !amdgpu_sriov_runtime(adev) &&
4125 amdgpu_virt_mmio_blocked(adev) &&
4126 !amdgpu_virt_wait_reset(adev)) {
4127 dev_err(adev->dev, "VF exclusive mode timeout\n");
4128 /* Don't send request since VF is inactive. */
4129 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4130 adev->virt.ops = NULL;
4131 r = -EAGAIN;
4132 }
970fd197
SY
4133 amdgpu_release_ras_context(adev);
4134
83ba126a 4135failed:
89041940 4136 amdgpu_vf_error_trans_all(adev);
8840a387 4137
83ba126a 4138 return r;
d38ceaf9
AD
4139}
4140
07775fc1
AG
4141static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4142{
62d5f9f7 4143
07775fc1
AG
4144 /* Clear all CPU mappings pointing to this device */
4145 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4146
4147 /* Unmap all mapped bars - Doorbell, registers and VRAM */
4148 amdgpu_device_doorbell_fini(adev);
4149
4150 iounmap(adev->rmmio);
4151 adev->rmmio = NULL;
4152 if (adev->mman.aper_base_kaddr)
4153 iounmap(adev->mman.aper_base_kaddr);
4154 adev->mman.aper_base_kaddr = NULL;
4155
4156 /* Memory manager related */
a0ba1279 4157 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4158 arch_phys_wc_del(adev->gmc.vram_mtrr);
4159 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4160 }
4161}
4162
d38ceaf9 4163/**
bbe04dec 4164 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4165 *
4166 * @adev: amdgpu_device pointer
4167 *
4168 * Tear down the driver info (all asics).
4169 * Called at driver shutdown.
4170 */
72c8c97b 4171void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4172{
aac89168 4173 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4174 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4175 adev->shutdown = true;
9f875167 4176
752c683d
ML
4177 /* make sure IB test finished before entering exclusive mode
4178 * to avoid preemption on IB test
4179 * */
519b8b76 4180 if (amdgpu_sriov_vf(adev)) {
752c683d 4181 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4182 amdgpu_virt_fini_data_exchange(adev);
4183 }
752c683d 4184
e5b03032
ML
4185 /* disable all interrupts */
4186 amdgpu_irq_disable_all(adev);
47fc644f 4187 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4188 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4189 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4190 else
4a580877 4191 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4192 }
8d35a259 4193 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4194
cd3a8a59 4195 if (adev->mman.initialized)
9bff18d1 4196 drain_workqueue(adev->mman.bdev.wq);
98f56188 4197
53e9d836 4198 if (adev->pm.sysfs_initialized)
7c868b59 4199 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4200 if (adev->ucode_sysfs_en)
4201 amdgpu_ucode_sysfs_fini(adev);
4202 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4203
232d1d43
SY
4204 /* disable ras feature must before hw fini */
4205 amdgpu_ras_pre_fini(adev);
4206
e9669fb7 4207 amdgpu_device_ip_fini_early(adev);
d10d0daa 4208
a3848df6
YW
4209 amdgpu_irq_fini_hw(adev);
4210
b6fd6e0f
SK
4211 if (adev->mman.initialized)
4212 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4213
d10d0daa 4214 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4215
39934d3e
VP
4216 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4217 amdgpu_device_unmap_mmio(adev);
87172e89 4218
72c8c97b
AG
4219}
4220
4221void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4222{
62d5f9f7 4223 int idx;
d37a3929 4224 bool px;
62d5f9f7 4225
8d35a259 4226 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4227 amdgpu_device_ip_fini(adev);
b31d3063 4228 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4229 adev->accel_working = false;
68ce8b24 4230 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4231
4232 amdgpu_reset_fini(adev);
4233
d38ceaf9 4234 /* free i2c buses */
4562236b
HW
4235 if (!amdgpu_device_has_dc_support(adev))
4236 amdgpu_i2c_fini(adev);
bfca0289
SL
4237
4238 if (amdgpu_emu_mode != 1)
4239 amdgpu_atombios_fini(adev);
4240
d38ceaf9
AD
4241 kfree(adev->bios);
4242 adev->bios = NULL;
d37a3929
OC
4243
4244 px = amdgpu_device_supports_px(adev_to_drm(adev));
4245
4246 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4247 apple_gmux_detect(NULL, NULL)))
84c8b22e 4248 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4249
4250 if (px)
83ba126a 4251 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4252
38d6be81 4253 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4254 vga_client_unregister(adev->pdev);
e9bc1bf7 4255
62d5f9f7
LS
4256 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4257
4258 iounmap(adev->rmmio);
4259 adev->rmmio = NULL;
4260 amdgpu_device_doorbell_fini(adev);
4261 drm_dev_exit(idx);
4262 }
4263
d155bef0
AB
4264 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4265 amdgpu_pmu_fini(adev);
72de33f8 4266 if (adev->mman.discovery_bin)
a190d1c7 4267 amdgpu_discovery_fini(adev);
72c8c97b 4268
cfbb6b00
AG
4269 amdgpu_reset_put_reset_domain(adev->reset_domain);
4270 adev->reset_domain = NULL;
4271
72c8c97b
AG
4272 kfree(adev->pci_state);
4273
d38ceaf9
AD
4274}
4275
58144d28
ND
4276/**
4277 * amdgpu_device_evict_resources - evict device resources
4278 * @adev: amdgpu device object
4279 *
4280 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4281 * of the vram memory type. Mainly used for evicting device resources
4282 * at suspend time.
4283 *
4284 */
7863c155 4285static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4286{
7863c155
ML
4287 int ret;
4288
e53d9665
ML
4289 /* No need to evict vram on APUs for suspend to ram or s2idle */
4290 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4291 return 0;
58144d28 4292
7863c155
ML
4293 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4294 if (ret)
58144d28 4295 DRM_WARN("evicting device resources failed\n");
7863c155 4296 return ret;
58144d28 4297}
d38ceaf9
AD
4298
4299/*
4300 * Suspend & resume.
4301 */
4302/**
810ddc3a 4303 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4304 *
87e3f136 4305 * @dev: drm dev pointer
87e3f136 4306 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4307 *
4308 * Puts the hw in the suspend state (all asics).
4309 * Returns 0 for success or an error on failure.
4310 * Called at driver suspend.
4311 */
de185019 4312int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4313{
a2e15b0e 4314 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4315 int r = 0;
d38ceaf9 4316
d38ceaf9
AD
4317 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4318 return 0;
4319
44779b43 4320 adev->in_suspend = true;
3fa8f89d 4321
47ea2076
SF
4322 /* Evict the majority of BOs before grabbing the full access */
4323 r = amdgpu_device_evict_resources(adev);
4324 if (r)
4325 return r;
4326
d7274ec7
BZ
4327 if (amdgpu_sriov_vf(adev)) {
4328 amdgpu_virt_fini_data_exchange(adev);
4329 r = amdgpu_virt_request_full_gpu(adev, false);
4330 if (r)
4331 return r;
4332 }
4333
3fa8f89d
S
4334 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4335 DRM_WARN("smart shift update failed\n");
4336
5f818173 4337 if (fbcon)
087451f3 4338 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4339
beff74bc 4340 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4341
5e6932fe 4342 amdgpu_ras_suspend(adev);
4343
2196927b 4344 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4345
c004d44e 4346 if (!adev->in_s0ix)
5d3a2d95 4347 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4348
7863c155
ML
4349 r = amdgpu_device_evict_resources(adev);
4350 if (r)
4351 return r;
d38ceaf9 4352
8d35a259 4353 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4354
2196927b 4355 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4356
d7274ec7
BZ
4357 if (amdgpu_sriov_vf(adev))
4358 amdgpu_virt_release_full_gpu(adev, false);
4359
d38ceaf9
AD
4360 return 0;
4361}
4362
4363/**
810ddc3a 4364 * amdgpu_device_resume - initiate device resume
d38ceaf9 4365 *
87e3f136 4366 * @dev: drm dev pointer
87e3f136 4367 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4368 *
4369 * Bring the hw back to operating state (all asics).
4370 * Returns 0 for success or an error on failure.
4371 * Called at driver resume.
4372 */
de185019 4373int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4374{
1348969a 4375 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4376 int r = 0;
d38ceaf9 4377
d7274ec7
BZ
4378 if (amdgpu_sriov_vf(adev)) {
4379 r = amdgpu_virt_request_full_gpu(adev, true);
4380 if (r)
4381 return r;
4382 }
4383
d38ceaf9
AD
4384 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4385 return 0;
4386
62498733 4387 if (adev->in_s0ix)
bc143d8b 4388 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4389
d38ceaf9 4390 /* post card */
39c640c0 4391 if (amdgpu_device_need_post(adev)) {
4d2997ab 4392 r = amdgpu_device_asic_init(adev);
74b0b157 4393 if (r)
aac89168 4394 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4395 }
d38ceaf9 4396
06ec9070 4397 r = amdgpu_device_ip_resume(adev);
d7274ec7 4398
e6707218 4399 if (r) {
aac89168 4400 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4401 goto exit;
e6707218 4402 }
8d35a259 4403 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4404
06ec9070 4405 r = amdgpu_device_ip_late_init(adev);
03161a6e 4406 if (r)
3c22c1ea 4407 goto exit;
d38ceaf9 4408
beff74bc
AD
4409 queue_delayed_work(system_wq, &adev->delayed_init_work,
4410 msecs_to_jiffies(AMDGPU_RESUME_MS));
4411
c004d44e 4412 if (!adev->in_s0ix) {
5d3a2d95
AD
4413 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4414 if (r)
3c22c1ea 4415 goto exit;
5d3a2d95 4416 }
756e6880 4417
3c22c1ea
SF
4418exit:
4419 if (amdgpu_sriov_vf(adev)) {
4420 amdgpu_virt_init_data_exchange(adev);
4421 amdgpu_virt_release_full_gpu(adev, true);
4422 }
4423
4424 if (r)
4425 return r;
4426
96a5d8d4 4427 /* Make sure IB tests flushed */
beff74bc 4428 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4429
a2e15b0e 4430 if (fbcon)
087451f3 4431 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4432
5e6932fe 4433 amdgpu_ras_resume(adev);
4434
d09ef243
AD
4435 if (adev->mode_info.num_crtc) {
4436 /*
4437 * Most of the connector probing functions try to acquire runtime pm
4438 * refs to ensure that the GPU is powered on when connector polling is
4439 * performed. Since we're calling this from a runtime PM callback,
4440 * trying to acquire rpm refs will cause us to deadlock.
4441 *
4442 * Since we're guaranteed to be holding the rpm lock, it's safe to
4443 * temporarily disable the rpm helpers so this doesn't deadlock us.
4444 */
23a1a9e5 4445#ifdef CONFIG_PM
d09ef243 4446 dev->dev->power.disable_depth++;
23a1a9e5 4447#endif
d09ef243
AD
4448 if (!adev->dc_enabled)
4449 drm_helper_hpd_irq_event(dev);
4450 else
4451 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4452#ifdef CONFIG_PM
d09ef243 4453 dev->dev->power.disable_depth--;
23a1a9e5 4454#endif
d09ef243 4455 }
44779b43
RZ
4456 adev->in_suspend = false;
4457
dc907c9d
JX
4458 if (adev->enable_mes)
4459 amdgpu_mes_self_test(adev);
4460
3fa8f89d
S
4461 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4462 DRM_WARN("smart shift update failed\n");
4463
4d3b9ae5 4464 return 0;
d38ceaf9
AD
4465}
4466
e3ecdffa
AD
4467/**
4468 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4469 *
4470 * @adev: amdgpu_device pointer
4471 *
4472 * The list of all the hardware IPs that make up the asic is walked and
4473 * the check_soft_reset callbacks are run. check_soft_reset determines
4474 * if the asic is still hung or not.
4475 * Returns true if any of the IPs are still in a hung state, false if not.
4476 */
06ec9070 4477static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4478{
4479 int i;
4480 bool asic_hang = false;
4481
f993d628
ML
4482 if (amdgpu_sriov_vf(adev))
4483 return true;
4484
8bc04c29
AD
4485 if (amdgpu_asic_need_full_reset(adev))
4486 return true;
4487
63fbf42f 4488 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4489 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4490 continue;
a1255107
AD
4491 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4492 adev->ip_blocks[i].status.hang =
4493 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4494 if (adev->ip_blocks[i].status.hang) {
aac89168 4495 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4496 asic_hang = true;
4497 }
4498 }
4499 return asic_hang;
4500}
4501
e3ecdffa
AD
4502/**
4503 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4504 *
4505 * @adev: amdgpu_device pointer
4506 *
4507 * The list of all the hardware IPs that make up the asic is walked and the
4508 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4509 * handles any IP specific hardware or software state changes that are
4510 * necessary for a soft reset to succeed.
4511 * Returns 0 on success, negative error code on failure.
4512 */
06ec9070 4513static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4514{
4515 int i, r = 0;
4516
4517 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4518 if (!adev->ip_blocks[i].status.valid)
d31a501e 4519 continue;
a1255107
AD
4520 if (adev->ip_blocks[i].status.hang &&
4521 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4522 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4523 if (r)
4524 return r;
4525 }
4526 }
4527
4528 return 0;
4529}
4530
e3ecdffa
AD
4531/**
4532 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4533 *
4534 * @adev: amdgpu_device pointer
4535 *
4536 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4537 * reset is necessary to recover.
4538 * Returns true if a full asic reset is required, false if not.
4539 */
06ec9070 4540static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4541{
da146d3b
AD
4542 int i;
4543
8bc04c29
AD
4544 if (amdgpu_asic_need_full_reset(adev))
4545 return true;
4546
da146d3b 4547 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4548 if (!adev->ip_blocks[i].status.valid)
da146d3b 4549 continue;
a1255107
AD
4550 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4551 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4552 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4553 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4554 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4555 if (adev->ip_blocks[i].status.hang) {
aac89168 4556 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4557 return true;
4558 }
4559 }
35d782fe
CZ
4560 }
4561 return false;
4562}
4563
e3ecdffa
AD
4564/**
4565 * amdgpu_device_ip_soft_reset - do a soft reset
4566 *
4567 * @adev: amdgpu_device pointer
4568 *
4569 * The list of all the hardware IPs that make up the asic is walked and the
4570 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4571 * IP specific hardware or software state changes that are necessary to soft
4572 * reset the IP.
4573 * Returns 0 on success, negative error code on failure.
4574 */
06ec9070 4575static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4576{
4577 int i, r = 0;
4578
4579 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4580 if (!adev->ip_blocks[i].status.valid)
35d782fe 4581 continue;
a1255107
AD
4582 if (adev->ip_blocks[i].status.hang &&
4583 adev->ip_blocks[i].version->funcs->soft_reset) {
4584 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4585 if (r)
4586 return r;
4587 }
4588 }
4589
4590 return 0;
4591}
4592
e3ecdffa
AD
4593/**
4594 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4595 *
4596 * @adev: amdgpu_device pointer
4597 *
4598 * The list of all the hardware IPs that make up the asic is walked and the
4599 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4600 * handles any IP specific hardware or software state changes that are
4601 * necessary after the IP has been soft reset.
4602 * Returns 0 on success, negative error code on failure.
4603 */
06ec9070 4604static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4605{
4606 int i, r = 0;
4607
4608 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4609 if (!adev->ip_blocks[i].status.valid)
35d782fe 4610 continue;
a1255107
AD
4611 if (adev->ip_blocks[i].status.hang &&
4612 adev->ip_blocks[i].version->funcs->post_soft_reset)
4613 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4614 if (r)
4615 return r;
4616 }
4617
4618 return 0;
4619}
4620
e3ecdffa 4621/**
c33adbc7 4622 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4623 *
4624 * @adev: amdgpu_device pointer
4625 *
4626 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4627 * restore things like GPUVM page tables after a GPU reset where
4628 * the contents of VRAM might be lost.
403009bf
CK
4629 *
4630 * Returns:
4631 * 0 on success, negative error code on failure.
e3ecdffa 4632 */
c33adbc7 4633static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4634{
c41d1cf6 4635 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4636 struct amdgpu_bo *shadow;
e18aaea7 4637 struct amdgpu_bo_vm *vmbo;
403009bf 4638 long r = 1, tmo;
c41d1cf6
ML
4639
4640 if (amdgpu_sriov_runtime(adev))
b045d3af 4641 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4642 else
4643 tmo = msecs_to_jiffies(100);
4644
aac89168 4645 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4646 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4647 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4648 /* If vm is compute context or adev is APU, shadow will be NULL */
4649 if (!vmbo->shadow)
4650 continue;
4651 shadow = vmbo->shadow;
4652
403009bf 4653 /* No need to recover an evicted BO */
d3116756
CK
4654 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4655 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4656 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4657 continue;
4658
4659 r = amdgpu_bo_restore_shadow(shadow, &next);
4660 if (r)
4661 break;
4662
c41d1cf6 4663 if (fence) {
1712fb1a 4664 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4665 dma_fence_put(fence);
4666 fence = next;
1712fb1a 4667 if (tmo == 0) {
4668 r = -ETIMEDOUT;
c41d1cf6 4669 break;
1712fb1a 4670 } else if (tmo < 0) {
4671 r = tmo;
4672 break;
4673 }
403009bf
CK
4674 } else {
4675 fence = next;
c41d1cf6 4676 }
c41d1cf6
ML
4677 }
4678 mutex_unlock(&adev->shadow_list_lock);
4679
403009bf
CK
4680 if (fence)
4681 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4682 dma_fence_put(fence);
4683
1712fb1a 4684 if (r < 0 || tmo <= 0) {
aac89168 4685 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4686 return -EIO;
4687 }
c41d1cf6 4688
aac89168 4689 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4690 return 0;
c41d1cf6
ML
4691}
4692
a90ad3c2 4693
e3ecdffa 4694/**
06ec9070 4695 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4696 *
982a820b 4697 * @adev: amdgpu_device pointer
87e3f136 4698 * @from_hypervisor: request from hypervisor
5740682e
ML
4699 *
4700 * do VF FLR and reinitialize Asic
3f48c681 4701 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4702 */
4703static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4704 bool from_hypervisor)
5740682e
ML
4705{
4706 int r;
a5f67c93 4707 struct amdgpu_hive_info *hive = NULL;
7258fa31 4708 int retry_limit = 0;
5740682e 4709
7258fa31 4710retry:
c004d44e 4711 amdgpu_amdkfd_pre_reset(adev);
428890a3 4712
5740682e
ML
4713 if (from_hypervisor)
4714 r = amdgpu_virt_request_full_gpu(adev, true);
4715 else
4716 r = amdgpu_virt_reset_gpu(adev);
4717 if (r)
4718 return r;
a90ad3c2
ML
4719
4720 /* Resume IP prior to SMC */
06ec9070 4721 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4722 if (r)
4723 goto error;
a90ad3c2 4724
c9ffa427 4725 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4726
7a3e0bb2
RZ
4727 r = amdgpu_device_fw_loading(adev);
4728 if (r)
4729 return r;
4730
a90ad3c2 4731 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4732 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4733 if (r)
4734 goto error;
a90ad3c2 4735
a5f67c93
ZL
4736 hive = amdgpu_get_xgmi_hive(adev);
4737 /* Update PSP FW topology after reset */
4738 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4739 r = amdgpu_xgmi_update_topology(hive, adev);
4740
4741 if (hive)
4742 amdgpu_put_xgmi_hive(hive);
4743
4744 if (!r) {
4745 amdgpu_irq_gpu_reset_resume_helper(adev);
4746 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4747
c004d44e 4748 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4749 }
a90ad3c2 4750
abc34253 4751error:
c41d1cf6 4752 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4753 amdgpu_inc_vram_lost(adev);
c33adbc7 4754 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4755 }
437f3e0b 4756 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4757
7258fa31
SK
4758 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4759 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4760 retry_limit++;
4761 goto retry;
4762 } else
4763 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4764 }
4765
a90ad3c2
ML
4766 return r;
4767}
4768
9a1cddd6 4769/**
4770 * amdgpu_device_has_job_running - check if there is any job in mirror list
4771 *
982a820b 4772 * @adev: amdgpu_device pointer
9a1cddd6 4773 *
4774 * check if there is any job in mirror list
4775 */
4776bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4777{
4778 int i;
4779 struct drm_sched_job *job;
4780
4781 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4782 struct amdgpu_ring *ring = adev->rings[i];
4783
4784 if (!ring || !ring->sched.thread)
4785 continue;
4786
4787 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4788 job = list_first_entry_or_null(&ring->sched.pending_list,
4789 struct drm_sched_job, list);
9a1cddd6 4790 spin_unlock(&ring->sched.job_list_lock);
4791 if (job)
4792 return true;
4793 }
4794 return false;
4795}
4796
12938fad
CK
4797/**
4798 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4799 *
982a820b 4800 * @adev: amdgpu_device pointer
12938fad
CK
4801 *
4802 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4803 * a hung GPU.
4804 */
4805bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4806{
12938fad 4807
3ba7b418
AG
4808 if (amdgpu_gpu_recovery == 0)
4809 goto disabled;
4810
1a11a65d
YC
4811 /* Skip soft reset check in fatal error mode */
4812 if (!amdgpu_ras_is_poison_mode_supported(adev))
4813 return true;
4814
3ba7b418
AG
4815 if (amdgpu_sriov_vf(adev))
4816 return true;
4817
4818 if (amdgpu_gpu_recovery == -1) {
4819 switch (adev->asic_type) {
b3523c45
AD
4820#ifdef CONFIG_DRM_AMDGPU_SI
4821 case CHIP_VERDE:
4822 case CHIP_TAHITI:
4823 case CHIP_PITCAIRN:
4824 case CHIP_OLAND:
4825 case CHIP_HAINAN:
4826#endif
4827#ifdef CONFIG_DRM_AMDGPU_CIK
4828 case CHIP_KAVERI:
4829 case CHIP_KABINI:
4830 case CHIP_MULLINS:
4831#endif
4832 case CHIP_CARRIZO:
4833 case CHIP_STONEY:
4834 case CHIP_CYAN_SKILLFISH:
3ba7b418 4835 goto disabled;
b3523c45
AD
4836 default:
4837 break;
3ba7b418 4838 }
12938fad
CK
4839 }
4840
4841 return true;
3ba7b418
AG
4842
4843disabled:
aac89168 4844 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4845 return false;
12938fad
CK
4846}
4847
5c03e584
FX
4848int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4849{
47fc644f
SS
4850 u32 i;
4851 int ret = 0;
5c03e584 4852
47fc644f 4853 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4854
47fc644f 4855 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4856
47fc644f
SS
4857 /* disable BM */
4858 pci_clear_master(adev->pdev);
5c03e584 4859
47fc644f 4860 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4861
47fc644f
SS
4862 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4863 dev_info(adev->dev, "GPU smu mode1 reset\n");
4864 ret = amdgpu_dpm_mode1_reset(adev);
4865 } else {
4866 dev_info(adev->dev, "GPU psp mode1 reset\n");
4867 ret = psp_gpu_reset(adev);
4868 }
5c03e584 4869
47fc644f
SS
4870 if (ret)
4871 dev_err(adev->dev, "GPU mode1 reset failed\n");
5c03e584 4872
47fc644f 4873 amdgpu_device_load_pci_state(adev->pdev);
5c03e584 4874
47fc644f
SS
4875 /* wait for asic to come out of reset */
4876 for (i = 0; i < adev->usec_timeout; i++) {
4877 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4878
47fc644f
SS
4879 if (memsize != 0xffffffff)
4880 break;
4881 udelay(1);
4882 }
5c03e584 4883
47fc644f
SS
4884 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4885 return ret;
5c03e584 4886}
5c6dd71e 4887
e3c1b071 4888int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4889 struct amdgpu_reset_context *reset_context)
26bc5340 4890{
5c1e6fa4 4891 int i, r = 0;
04442bf7
LL
4892 struct amdgpu_job *job = NULL;
4893 bool need_full_reset =
4894 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4895
4896 if (reset_context->reset_req_dev == adev)
4897 job = reset_context->job;
71182665 4898
b602ca5f
TZ
4899 if (amdgpu_sriov_vf(adev)) {
4900 /* stop the data exchange thread */
4901 amdgpu_virt_fini_data_exchange(adev);
4902 }
4903
9e225fb9
AG
4904 amdgpu_fence_driver_isr_toggle(adev, true);
4905
71182665 4906 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4907 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4908 struct amdgpu_ring *ring = adev->rings[i];
4909
51687759 4910 if (!ring || !ring->sched.thread)
0875dc9e 4911 continue;
5740682e 4912
c530b02f
JZ
4913 /*clear job fence from fence drv to avoid force_completion
4914 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4915 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4916
2f9d4084
ML
4917 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4918 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4919 }
d38ceaf9 4920
9e225fb9
AG
4921 amdgpu_fence_driver_isr_toggle(adev, false);
4922
ff99849b 4923 if (job && job->vm)
222b5f04
AG
4924 drm_sched_increase_karma(&job->base);
4925
04442bf7 4926 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4927 /* If reset handler not implemented, continue; otherwise return */
4928 if (r == -ENOSYS)
4929 r = 0;
4930 else
04442bf7
LL
4931 return r;
4932
1d721ed6 4933 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4934 if (!amdgpu_sriov_vf(adev)) {
4935
4936 if (!need_full_reset)
4937 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4938
360cd081
LG
4939 if (!need_full_reset && amdgpu_gpu_recovery &&
4940 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4941 amdgpu_device_ip_pre_soft_reset(adev);
4942 r = amdgpu_device_ip_soft_reset(adev);
4943 amdgpu_device_ip_post_soft_reset(adev);
4944 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4945 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4946 need_full_reset = true;
4947 }
4948 }
4949
4950 if (need_full_reset)
4951 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4952 if (need_full_reset)
4953 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4954 else
4955 clear_bit(AMDGPU_NEED_FULL_RESET,
4956 &reset_context->flags);
26bc5340
AG
4957 }
4958
4959 return r;
4960}
4961
15fd09a0
SA
4962static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4963{
15fd09a0
SA
4964 int i;
4965
38a15ad9 4966 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4967
4968 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4969 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4970 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4971 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4972 }
4973
4974 return 0;
4975}
4976
3d8785f6
SA
4977#ifdef CONFIG_DEV_COREDUMP
4978static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4979 size_t count, void *data, size_t datalen)
4980{
4981 struct drm_printer p;
4982 struct amdgpu_device *adev = data;
4983 struct drm_print_iterator iter;
4984 int i;
4985
4986 iter.data = buffer;
4987 iter.offset = 0;
4988 iter.start = offset;
4989 iter.remain = count;
4990
4991 p = drm_coredump_printer(&iter);
4992
4993 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4994 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4995 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4996 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4997 if (adev->reset_task_info.pid)
4998 drm_printf(&p, "process_name: %s PID: %d\n",
4999 adev->reset_task_info.process_name,
5000 adev->reset_task_info.pid);
5001
5002 if (adev->reset_vram_lost)
5003 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
5004 if (adev->num_regs) {
5005 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
5006
5007 for (i = 0; i < adev->num_regs; i++)
5008 drm_printf(&p, "0x%08x: 0x%08x\n",
5009 adev->reset_dump_reg_list[i],
5010 adev->reset_dump_reg_value[i]);
5011 }
5012
5013 return count - iter.remain;
5014}
5015
5016static void amdgpu_devcoredump_free(void *data)
5017{
5018}
5019
5020static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
5021{
5022 struct drm_device *dev = adev_to_drm(adev);
5023
5024 ktime_get_ts64(&adev->reset_time);
5025 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
5026 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
5027}
5028#endif
5029
04442bf7
LL
5030int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5031 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5032{
5033 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5034 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5035 int r = 0;
f5c7e779 5036 bool gpu_reset_for_dev_remove = 0;
26bc5340 5037
04442bf7
LL
5038 /* Try reset handler method first */
5039 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5040 reset_list);
15fd09a0 5041 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5042
5043 reset_context->reset_device_list = device_list_handle;
04442bf7 5044 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
5045 /* If reset handler not implemented, continue; otherwise return */
5046 if (r == -ENOSYS)
5047 r = 0;
5048 else
04442bf7
LL
5049 return r;
5050
5051 /* Reset handler not implemented, use the default method */
5052 need_full_reset =
5053 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5054 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5055
f5c7e779
YC
5056 gpu_reset_for_dev_remove =
5057 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5058 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5059
26bc5340 5060 /*
655ce9cb 5061 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5062 * to allow proper links negotiation in FW (within 1 sec)
5063 */
7ac71382 5064 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5065 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5066 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5067 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5068 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5069 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5070 r = -EALREADY;
5071 } else
5072 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5073
041a62bc 5074 if (r) {
aac89168 5075 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5076 r, adev_to_drm(tmp_adev)->unique);
041a62bc 5077 break;
ce316fa5
LM
5078 }
5079 }
5080
041a62bc
AG
5081 /* For XGMI wait for all resets to complete before proceed */
5082 if (!r) {
655ce9cb 5083 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5084 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5085 flush_work(&tmp_adev->xgmi_reset_work);
5086 r = tmp_adev->asic_reset_res;
5087 if (r)
5088 break;
ce316fa5
LM
5089 }
5090 }
5091 }
ce316fa5 5092 }
26bc5340 5093
43c4d576 5094 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5095 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 5096 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5097 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5098 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
5099 }
5100
00eaa571 5101 amdgpu_ras_intr_cleared();
43c4d576 5102 }
00eaa571 5103
f5c7e779
YC
5104 /* Since the mode1 reset affects base ip blocks, the
5105 * phase1 ip blocks need to be resumed. Otherwise there
5106 * will be a BIOS signature error and the psp bootloader
5107 * can't load kdb on the next amdgpu install.
5108 */
5109 if (gpu_reset_for_dev_remove) {
5110 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5111 amdgpu_device_ip_resume_phase1(tmp_adev);
5112
5113 goto end;
5114 }
5115
655ce9cb 5116 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5117 if (need_full_reset) {
5118 /* post card */
e3c1b071 5119 r = amdgpu_device_asic_init(tmp_adev);
5120 if (r) {
aac89168 5121 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5122 } else {
26bc5340 5123 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
5124 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
5125 if (r)
5126 goto out;
5127
26bc5340
AG
5128 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5129 if (r)
5130 goto out;
5131
5132 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
5133#ifdef CONFIG_DEV_COREDUMP
5134 tmp_adev->reset_vram_lost = vram_lost;
5135 memset(&tmp_adev->reset_task_info, 0,
5136 sizeof(tmp_adev->reset_task_info));
5137 if (reset_context->job && reset_context->job->vm)
5138 tmp_adev->reset_task_info =
5139 reset_context->job->vm->task_info;
5140 amdgpu_reset_capture_coredumpm(tmp_adev);
5141#endif
26bc5340 5142 if (vram_lost) {
77e7f829 5143 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5144 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5145 }
5146
26bc5340
AG
5147 r = amdgpu_device_fw_loading(tmp_adev);
5148 if (r)
5149 return r;
5150
5151 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5152 if (r)
5153 goto out;
5154
5155 if (vram_lost)
5156 amdgpu_device_fill_reset_magic(tmp_adev);
5157
fdafb359
EQ
5158 /*
5159 * Add this ASIC as tracked as reset was already
5160 * complete successfully.
5161 */
5162 amdgpu_register_gpu_instance(tmp_adev);
5163
04442bf7
LL
5164 if (!reset_context->hive &&
5165 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5166 amdgpu_xgmi_add_device(tmp_adev);
5167
7c04ca50 5168 r = amdgpu_device_ip_late_init(tmp_adev);
5169 if (r)
5170 goto out;
5171
087451f3 5172 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5173
e8fbaf03
GC
5174 /*
5175 * The GPU enters bad state once faulty pages
5176 * by ECC has reached the threshold, and ras
5177 * recovery is scheduled next. So add one check
5178 * here to break recovery if it indeed exceeds
5179 * bad page threshold, and remind user to
5180 * retire this GPU or setting one bigger
5181 * bad_page_threshold value to fix this once
5182 * probing driver again.
5183 */
11003c68 5184 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5185 /* must succeed. */
5186 amdgpu_ras_resume(tmp_adev);
5187 } else {
5188 r = -EINVAL;
5189 goto out;
5190 }
e79a04d5 5191
26bc5340 5192 /* Update PSP FW topology after reset */
04442bf7
LL
5193 if (reset_context->hive &&
5194 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5195 r = amdgpu_xgmi_update_topology(
5196 reset_context->hive, tmp_adev);
26bc5340
AG
5197 }
5198 }
5199
26bc5340
AG
5200out:
5201 if (!r) {
5202 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5203 r = amdgpu_ib_ring_tests(tmp_adev);
5204 if (r) {
5205 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5206 need_full_reset = true;
5207 r = -EAGAIN;
5208 goto end;
5209 }
5210 }
5211
5212 if (!r)
5213 r = amdgpu_device_recover_vram(tmp_adev);
5214 else
5215 tmp_adev->asic_reset_res = r;
5216 }
5217
5218end:
04442bf7
LL
5219 if (need_full_reset)
5220 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5221 else
5222 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5223 return r;
5224}
5225
e923be99 5226static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5227{
5740682e 5228
a3a09142
AD
5229 switch (amdgpu_asic_reset_method(adev)) {
5230 case AMD_RESET_METHOD_MODE1:
5231 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5232 break;
5233 case AMD_RESET_METHOD_MODE2:
5234 adev->mp1_state = PP_MP1_STATE_RESET;
5235 break;
5236 default:
5237 adev->mp1_state = PP_MP1_STATE_NONE;
5238 break;
5239 }
26bc5340 5240}
d38ceaf9 5241
e923be99 5242static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5243{
89041940 5244 amdgpu_vf_error_trans_all(adev);
a3a09142 5245 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5246}
5247
3f12acc8
EQ
5248static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5249{
5250 struct pci_dev *p = NULL;
5251
5252 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5253 adev->pdev->bus->number, 1);
5254 if (p) {
5255 pm_runtime_enable(&(p->dev));
5256 pm_runtime_resume(&(p->dev));
5257 }
b85e285e
YY
5258
5259 pci_dev_put(p);
3f12acc8
EQ
5260}
5261
5262static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5263{
5264 enum amd_reset_method reset_method;
5265 struct pci_dev *p = NULL;
5266 u64 expires;
5267
5268 /*
5269 * For now, only BACO and mode1 reset are confirmed
5270 * to suffer the audio issue without proper suspended.
5271 */
5272 reset_method = amdgpu_asic_reset_method(adev);
5273 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5274 (reset_method != AMD_RESET_METHOD_MODE1))
5275 return -EINVAL;
5276
5277 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5278 adev->pdev->bus->number, 1);
5279 if (!p)
5280 return -ENODEV;
5281
5282 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5283 if (!expires)
5284 /*
5285 * If we cannot get the audio device autosuspend delay,
5286 * a fixed 4S interval will be used. Considering 3S is
5287 * the audio controller default autosuspend delay setting.
5288 * 4S used here is guaranteed to cover that.
5289 */
54b7feb9 5290 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5291
5292 while (!pm_runtime_status_suspended(&(p->dev))) {
5293 if (!pm_runtime_suspend(&(p->dev)))
5294 break;
5295
5296 if (expires < ktime_get_mono_fast_ns()) {
5297 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5298 pci_dev_put(p);
3f12acc8
EQ
5299 /* TODO: abort the succeeding gpu reset? */
5300 return -ETIMEDOUT;
5301 }
5302 }
5303
5304 pm_runtime_disable(&(p->dev));
5305
b85e285e 5306 pci_dev_put(p);
3f12acc8
EQ
5307 return 0;
5308}
5309
d193b12b 5310static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5311{
5312 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5313
5314#if defined(CONFIG_DEBUG_FS)
5315 if (!amdgpu_sriov_vf(adev))
5316 cancel_work(&adev->reset_work);
5317#endif
5318
5319 if (adev->kfd.dev)
5320 cancel_work(&adev->kfd.reset_work);
5321
5322 if (amdgpu_sriov_vf(adev))
5323 cancel_work(&adev->virt.flr_work);
5324
5325 if (con && adev->ras_enabled)
5326 cancel_work(&con->recovery_work);
5327
5328}
5329
26bc5340 5330/**
6e9c65f7 5331 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5332 *
982a820b 5333 * @adev: amdgpu_device pointer
26bc5340 5334 * @job: which job trigger hang
80bd2de1 5335 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5336 *
5337 * Attempt to reset the GPU if it has hung (all asics).
5338 * Attempt to do soft-reset or full-reset and reinitialize Asic
5339 * Returns 0 for success or an error on failure.
5340 */
5341
cf727044 5342int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5343 struct amdgpu_job *job,
5344 struct amdgpu_reset_context *reset_context)
26bc5340 5345{
1d721ed6 5346 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5347 bool job_signaled = false;
26bc5340 5348 struct amdgpu_hive_info *hive = NULL;
26bc5340 5349 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5350 int i, r = 0;
bb5c7235 5351 bool need_emergency_restart = false;
3f12acc8 5352 bool audio_suspended = false;
f5c7e779
YC
5353 bool gpu_reset_for_dev_remove = false;
5354
5355 gpu_reset_for_dev_remove =
5356 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5357 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5358
6e3cd2a9 5359 /*
bb5c7235
WS
5360 * Special case: RAS triggered and full reset isn't supported
5361 */
5362 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5363
d5ea093e
AG
5364 /*
5365 * Flush RAM to disk so that after reboot
5366 * the user can read log and see why the system rebooted.
5367 */
bb5c7235 5368 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5369 DRM_WARN("Emergency reboot.");
5370
5371 ksys_sync_helper();
5372 emergency_restart();
5373 }
5374
b823821f 5375 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5376 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5377
175ac6ec
ZL
5378 if (!amdgpu_sriov_vf(adev))
5379 hive = amdgpu_get_xgmi_hive(adev);
681260df 5380 if (hive)
53b3f8f4 5381 mutex_lock(&hive->hive_lock);
26bc5340 5382
f1549c09
LG
5383 reset_context->job = job;
5384 reset_context->hive = hive;
9e94d22c
EQ
5385 /*
5386 * Build list of devices to reset.
5387 * In case we are in XGMI hive mode, resort the device list
5388 * to put adev in the 1st position.
5389 */
5390 INIT_LIST_HEAD(&device_list);
175ac6ec 5391 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5392 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5393 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5394 if (gpu_reset_for_dev_remove && adev->shutdown)
5395 tmp_adev->shutdown = true;
5396 }
655ce9cb 5397 if (!list_is_first(&adev->reset_list, &device_list))
5398 list_rotate_to_front(&adev->reset_list, &device_list);
5399 device_list_handle = &device_list;
26bc5340 5400 } else {
655ce9cb 5401 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5402 device_list_handle = &device_list;
5403 }
5404
e923be99
AG
5405 /* We need to lock reset domain only once both for XGMI and single device */
5406 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5407 reset_list);
3675c2f2 5408 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5409
1d721ed6 5410 /* block all schedulers and reset given job's ring */
655ce9cb 5411 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5412
e923be99 5413 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5414
3f12acc8
EQ
5415 /*
5416 * Try to put the audio codec into suspend state
5417 * before gpu reset started.
5418 *
5419 * Due to the power domain of the graphics device
5420 * is shared with AZ power domain. Without this,
5421 * we may change the audio hardware from behind
5422 * the audio driver's back. That will trigger
5423 * some audio codec errors.
5424 */
5425 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5426 audio_suspended = true;
5427
9e94d22c
EQ
5428 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5429
52fb44cf
EQ
5430 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5431
c004d44e 5432 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5433 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5434
12ffa55d
AG
5435 /*
5436 * Mark these ASICs to be reseted as untracked first
5437 * And add them back after reset completed
5438 */
5439 amdgpu_unregister_gpu_instance(tmp_adev);
5440
163d4cd2 5441 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5442
f1c1314b 5443 /* disable ras on ALL IPs */
bb5c7235 5444 if (!need_emergency_restart &&
b823821f 5445 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5446 amdgpu_ras_suspend(tmp_adev);
5447
1d721ed6
AG
5448 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5449 struct amdgpu_ring *ring = tmp_adev->rings[i];
5450
5451 if (!ring || !ring->sched.thread)
5452 continue;
5453
0b2d2c2e 5454 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5455
bb5c7235 5456 if (need_emergency_restart)
7c6e68c7 5457 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5458 }
8f8c80f4 5459 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5460 }
5461
bb5c7235 5462 if (need_emergency_restart)
7c6e68c7
AG
5463 goto skip_sched_resume;
5464
1d721ed6
AG
5465 /*
5466 * Must check guilty signal here since after this point all old
5467 * HW fences are force signaled.
5468 *
5469 * job->base holds a reference to parent fence
5470 */
f6a3f660 5471 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5472 job_signaled = true;
1d721ed6
AG
5473 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5474 goto skip_hw_reset;
5475 }
5476
26bc5340 5477retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5478 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5479 if (gpu_reset_for_dev_remove) {
5480 /* Workaroud for ASICs need to disable SMC first */
5481 amdgpu_device_smu_fini_early(tmp_adev);
5482 }
f1549c09 5483 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5484 /*TODO Should we stop ?*/
5485 if (r) {
aac89168 5486 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5487 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5488 tmp_adev->asic_reset_res = r;
5489 }
247c7b0d
AG
5490
5491 /*
5492 * Drop all pending non scheduler resets. Scheduler resets
5493 * were already dropped during drm_sched_stop
5494 */
d193b12b 5495 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5496 }
5497
5498 /* Actual ASIC resets if needed.*/
4f30d920 5499 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5500 if (amdgpu_sriov_vf(adev)) {
5501 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5502 if (r)
5503 adev->asic_reset_res = r;
950d6425 5504
28606c4e
YC
5505 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5506 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5507 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
950d6425 5508 amdgpu_ras_resume(adev);
26bc5340 5509 } else {
f1549c09 5510 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5511 if (r && r == -EAGAIN)
26bc5340 5512 goto retry;
f5c7e779
YC
5513
5514 if (!r && gpu_reset_for_dev_remove)
5515 goto recover_end;
26bc5340
AG
5516 }
5517
1d721ed6
AG
5518skip_hw_reset:
5519
26bc5340 5520 /* Post ASIC reset for all devs .*/
655ce9cb 5521 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5522
1d721ed6
AG
5523 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5524 struct amdgpu_ring *ring = tmp_adev->rings[i];
5525
5526 if (!ring || !ring->sched.thread)
5527 continue;
5528
6868a2c4 5529 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5530 }
5531
693073a0 5532 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5533 amdgpu_mes_self_test(tmp_adev);
5534
1053b9c9 5535 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5536 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5537 }
5538
7258fa31
SK
5539 if (tmp_adev->asic_reset_res)
5540 r = tmp_adev->asic_reset_res;
5541
1d721ed6 5542 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5543
5544 if (r) {
5545 /* bad news, how to tell it to userspace ? */
12ffa55d 5546 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5547 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5548 } else {
12ffa55d 5549 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5550 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5551 DRM_WARN("smart shift update failed\n");
26bc5340 5552 }
7c6e68c7 5553 }
26bc5340 5554
7c6e68c7 5555skip_sched_resume:
655ce9cb 5556 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5557 /* unlock kfd: SRIOV would do it separately */
c004d44e 5558 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5559 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5560
5561 /* kfd_post_reset will do nothing if kfd device is not initialized,
5562 * need to bring up kfd here if it's not be initialized before
5563 */
5564 if (!adev->kfd.init_complete)
5565 amdgpu_amdkfd_device_init(adev);
5566
3f12acc8
EQ
5567 if (audio_suspended)
5568 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5569
5570 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5571
5572 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5573 }
5574
f5c7e779 5575recover_end:
e923be99
AG
5576 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5577 reset_list);
5578 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5579
9e94d22c 5580 if (hive) {
9e94d22c 5581 mutex_unlock(&hive->hive_lock);
d95e8e97 5582 amdgpu_put_xgmi_hive(hive);
9e94d22c 5583 }
26bc5340 5584
f287a3c5 5585 if (r)
26bc5340 5586 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5587
5588 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5589 return r;
5590}
5591
e3ecdffa
AD
5592/**
5593 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5594 *
5595 * @adev: amdgpu_device pointer
5596 *
5597 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5598 * and lanes) of the slot the device is in. Handles APUs and
5599 * virtualized environments where PCIE config space may not be available.
5600 */
5494d864 5601static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5602{
5d9a6330 5603 struct pci_dev *pdev;
c5313457
HK
5604 enum pci_bus_speed speed_cap, platform_speed_cap;
5605 enum pcie_link_width platform_link_width;
d0dd7f0c 5606
cd474ba0
AD
5607 if (amdgpu_pcie_gen_cap)
5608 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5609
cd474ba0
AD
5610 if (amdgpu_pcie_lane_cap)
5611 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5612
cd474ba0 5613 /* covers APUs as well */
04e85958 5614 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5615 if (adev->pm.pcie_gen_mask == 0)
5616 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5617 if (adev->pm.pcie_mlw_mask == 0)
5618 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5619 return;
cd474ba0 5620 }
d0dd7f0c 5621
c5313457
HK
5622 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5623 return;
5624
dbaa922b
AD
5625 pcie_bandwidth_available(adev->pdev, NULL,
5626 &platform_speed_cap, &platform_link_width);
c5313457 5627
cd474ba0 5628 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5629 /* asic caps */
5630 pdev = adev->pdev;
5631 speed_cap = pcie_get_speed_cap(pdev);
5632 if (speed_cap == PCI_SPEED_UNKNOWN) {
5633 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5634 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5635 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5636 } else {
2b3a1f51
FX
5637 if (speed_cap == PCIE_SPEED_32_0GT)
5638 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5639 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5640 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5641 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5642 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5643 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5644 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5645 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5646 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5647 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5648 else if (speed_cap == PCIE_SPEED_8_0GT)
5649 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5650 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5651 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5652 else if (speed_cap == PCIE_SPEED_5_0GT)
5653 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5654 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5655 else
5656 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5657 }
5658 /* platform caps */
c5313457 5659 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5660 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5661 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5662 } else {
2b3a1f51
FX
5663 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5664 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5665 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5666 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5667 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5668 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5669 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5670 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5671 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5672 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5673 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5674 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5675 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5676 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5677 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5678 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5679 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5680 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5681 else
5682 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5683
cd474ba0
AD
5684 }
5685 }
5686 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5687 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5688 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5689 } else {
c5313457 5690 switch (platform_link_width) {
5d9a6330 5691 case PCIE_LNK_X32:
cd474ba0
AD
5692 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5693 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5694 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5695 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5696 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5697 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5698 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5699 break;
5d9a6330 5700 case PCIE_LNK_X16:
cd474ba0
AD
5701 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5702 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5703 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5704 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5705 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5706 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5707 break;
5d9a6330 5708 case PCIE_LNK_X12:
cd474ba0
AD
5709 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5710 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5711 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5712 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5713 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5714 break;
5d9a6330 5715 case PCIE_LNK_X8:
cd474ba0
AD
5716 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5717 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5718 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5719 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5720 break;
5d9a6330 5721 case PCIE_LNK_X4:
cd474ba0
AD
5722 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5723 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5724 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5725 break;
5d9a6330 5726 case PCIE_LNK_X2:
cd474ba0
AD
5727 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5728 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5729 break;
5d9a6330 5730 case PCIE_LNK_X1:
cd474ba0
AD
5731 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5732 break;
5733 default:
5734 break;
5735 }
d0dd7f0c
AD
5736 }
5737 }
5738}
d38ceaf9 5739
08a2fd23
RE
5740/**
5741 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5742 *
5743 * @adev: amdgpu_device pointer
5744 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5745 *
5746 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5747 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5748 * @peer_adev.
5749 */
5750bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5751 struct amdgpu_device *peer_adev)
5752{
5753#ifdef CONFIG_HSA_AMD_P2P
5754 uint64_t address_mask = peer_adev->dev->dma_mask ?
5755 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5756 resource_size_t aper_limit =
5757 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5758 bool p2p_access =
5759 !adev->gmc.xgmi.connected_to_cpu &&
5760 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5761
5762 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5763 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5764 !(adev->gmc.aper_base & address_mask ||
5765 aper_limit & address_mask));
5766#else
5767 return false;
5768#endif
5769}
5770
361dbd01
AD
5771int amdgpu_device_baco_enter(struct drm_device *dev)
5772{
1348969a 5773 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5774 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5775
6ab68650 5776 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5777 return -ENOTSUPP;
5778
8ab0d6f0 5779 if (ras && adev->ras_enabled &&
acdae216 5780 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5781 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5782
9530273e 5783 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5784}
5785
5786int amdgpu_device_baco_exit(struct drm_device *dev)
5787{
1348969a 5788 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5789 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5790 int ret = 0;
361dbd01 5791
6ab68650 5792 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5793 return -ENOTSUPP;
5794
9530273e
EQ
5795 ret = amdgpu_dpm_baco_exit(adev);
5796 if (ret)
5797 return ret;
7a22677b 5798
8ab0d6f0 5799 if (ras && adev->ras_enabled &&
acdae216 5800 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5801 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5802
1bece222
CL
5803 if (amdgpu_passthrough(adev) &&
5804 adev->nbio.funcs->clear_doorbell_interrupt)
5805 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5806
7a22677b 5807 return 0;
361dbd01 5808}
c9a6b82f
AG
5809
5810/**
5811 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5812 * @pdev: PCI device struct
5813 * @state: PCI channel state
5814 *
5815 * Description: Called when a PCI error is detected.
5816 *
5817 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5818 */
5819pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5820{
5821 struct drm_device *dev = pci_get_drvdata(pdev);
5822 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5823 int i;
c9a6b82f
AG
5824
5825 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5826
6894305c
AG
5827 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5828 DRM_WARN("No support for XGMI hive yet...");
5829 return PCI_ERS_RESULT_DISCONNECT;
5830 }
5831
e17e27f9
GC
5832 adev->pci_channel_state = state;
5833
c9a6b82f
AG
5834 switch (state) {
5835 case pci_channel_io_normal:
5836 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5837 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5838 case pci_channel_io_frozen:
5839 /*
d0fb18b5 5840 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5841 * to GPU during PCI error recovery
5842 */
3675c2f2 5843 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5844 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5845
5846 /*
5847 * Block any work scheduling as we do for regular GPU reset
5848 * for the duration of the recovery
5849 */
5850 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5851 struct amdgpu_ring *ring = adev->rings[i];
5852
5853 if (!ring || !ring->sched.thread)
5854 continue;
5855
5856 drm_sched_stop(&ring->sched, NULL);
5857 }
8f8c80f4 5858 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5859 return PCI_ERS_RESULT_NEED_RESET;
5860 case pci_channel_io_perm_failure:
5861 /* Permanent error, prepare for device removal */
5862 return PCI_ERS_RESULT_DISCONNECT;
5863 }
5864
5865 return PCI_ERS_RESULT_NEED_RESET;
5866}
5867
5868/**
5869 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5870 * @pdev: pointer to PCI device
5871 */
5872pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5873{
5874
5875 DRM_INFO("PCI error: mmio enabled callback!!\n");
5876
5877 /* TODO - dump whatever for debugging purposes */
5878
5879 /* This called only if amdgpu_pci_error_detected returns
5880 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5881 * works, no need to reset slot.
5882 */
5883
5884 return PCI_ERS_RESULT_RECOVERED;
5885}
5886
5887/**
5888 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5889 * @pdev: PCI device struct
5890 *
5891 * Description: This routine is called by the pci error recovery
5892 * code after the PCI slot has been reset, just before we
5893 * should resume normal operations.
5894 */
5895pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5896{
5897 struct drm_device *dev = pci_get_drvdata(pdev);
5898 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5899 int r, i;
04442bf7 5900 struct amdgpu_reset_context reset_context;
362c7b91 5901 u32 memsize;
7ac71382 5902 struct list_head device_list;
c9a6b82f
AG
5903
5904 DRM_INFO("PCI error: slot reset callback!!\n");
5905
04442bf7
LL
5906 memset(&reset_context, 0, sizeof(reset_context));
5907
7ac71382 5908 INIT_LIST_HEAD(&device_list);
655ce9cb 5909 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5910
362c7b91
AG
5911 /* wait for asic to come out of reset */
5912 msleep(500);
5913
7ac71382 5914 /* Restore PCI confspace */
c1dd4aa6 5915 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5916
362c7b91
AG
5917 /* confirm ASIC came out of reset */
5918 for (i = 0; i < adev->usec_timeout; i++) {
5919 memsize = amdgpu_asic_get_config_memsize(adev);
5920
5921 if (memsize != 0xffffffff)
5922 break;
5923 udelay(1);
5924 }
5925 if (memsize == 0xffffffff) {
5926 r = -ETIME;
5927 goto out;
5928 }
5929
04442bf7
LL
5930 reset_context.method = AMD_RESET_METHOD_NONE;
5931 reset_context.reset_req_dev = adev;
5932 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5933 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5934
7afefb81 5935 adev->no_hw_access = true;
04442bf7 5936 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5937 adev->no_hw_access = false;
c9a6b82f
AG
5938 if (r)
5939 goto out;
5940
04442bf7 5941 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5942
5943out:
c9a6b82f 5944 if (!r) {
c1dd4aa6
AG
5945 if (amdgpu_device_cache_pci_state(adev->pdev))
5946 pci_restore_state(adev->pdev);
5947
c9a6b82f
AG
5948 DRM_INFO("PCIe error recovery succeeded\n");
5949 } else {
5950 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5951 amdgpu_device_unset_mp1_state(adev);
5952 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5953 }
5954
5955 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5956}
5957
5958/**
5959 * amdgpu_pci_resume() - resume normal ops after PCI reset
5960 * @pdev: pointer to PCI device
5961 *
5962 * Called when the error recovery driver tells us that its
505199a3 5963 * OK to resume normal operation.
c9a6b82f
AG
5964 */
5965void amdgpu_pci_resume(struct pci_dev *pdev)
5966{
5967 struct drm_device *dev = pci_get_drvdata(pdev);
5968 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5969 int i;
c9a6b82f 5970
c9a6b82f
AG
5971
5972 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5973
e17e27f9
GC
5974 /* Only continue execution for the case of pci_channel_io_frozen */
5975 if (adev->pci_channel_state != pci_channel_io_frozen)
5976 return;
5977
acd89fca
AG
5978 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5979 struct amdgpu_ring *ring = adev->rings[i];
5980
5981 if (!ring || !ring->sched.thread)
5982 continue;
5983
acd89fca
AG
5984 drm_sched_start(&ring->sched, true);
5985 }
5986
e923be99
AG
5987 amdgpu_device_unset_mp1_state(adev);
5988 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5989}
c1dd4aa6
AG
5990
5991bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5992{
5993 struct drm_device *dev = pci_get_drvdata(pdev);
5994 struct amdgpu_device *adev = drm_to_adev(dev);
5995 int r;
5996
5997 r = pci_save_state(pdev);
5998 if (!r) {
5999 kfree(adev->pci_state);
6000
6001 adev->pci_state = pci_store_saved_state(pdev);
6002
6003 if (!adev->pci_state) {
6004 DRM_ERROR("Failed to store PCI saved state");
6005 return false;
6006 }
6007 } else {
6008 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6009 return false;
6010 }
6011
6012 return true;
6013}
6014
6015bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6016{
6017 struct drm_device *dev = pci_get_drvdata(pdev);
6018 struct amdgpu_device *adev = drm_to_adev(dev);
6019 int r;
6020
6021 if (!adev->pci_state)
6022 return false;
6023
6024 r = pci_load_saved_state(pdev, adev->pci_state);
6025
6026 if (!r) {
6027 pci_restore_state(pdev);
6028 } else {
6029 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6030 return false;
6031 }
6032
6033 return true;
6034}
6035
810085dd
EH
6036void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6037 struct amdgpu_ring *ring)
6038{
6039#ifdef CONFIG_X86_64
b818a5d3 6040 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6041 return;
6042#endif
6043 if (adev->gmc.xgmi.connected_to_cpu)
6044 return;
6045
6046 if (ring && ring->funcs->emit_hdp_flush)
6047 amdgpu_ring_emit_hdp_flush(ring);
6048 else
6049 amdgpu_asic_flush_hdp(adev, ring);
6050}
c1dd4aa6 6051
810085dd
EH
6052void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6053 struct amdgpu_ring *ring)
6054{
6055#ifdef CONFIG_X86_64
b818a5d3 6056 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6057 return;
6058#endif
6059 if (adev->gmc.xgmi.connected_to_cpu)
6060 return;
c1dd4aa6 6061
810085dd
EH
6062 amdgpu_asic_invalidate_hdp(adev, ring);
6063}
34f3a4a9 6064
89a7a870
AG
6065int amdgpu_in_reset(struct amdgpu_device *adev)
6066{
6067 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6068}
6069
34f3a4a9
LY
6070/**
6071 * amdgpu_device_halt() - bring hardware to some kind of halt state
6072 *
6073 * @adev: amdgpu_device pointer
6074 *
6075 * Bring hardware to some kind of halt state so that no one can touch it
6076 * any more. It will help to maintain error context when error occurred.
6077 * Compare to a simple hang, the system will keep stable at least for SSH
6078 * access. Then it should be trivial to inspect the hardware state and
6079 * see what's going on. Implemented as following:
6080 *
6081 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6082 * clears all CPU mappings to device, disallows remappings through page faults
6083 * 2. amdgpu_irq_disable_all() disables all interrupts
6084 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6085 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6086 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6087 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6088 * flush any in flight DMA operations
6089 */
6090void amdgpu_device_halt(struct amdgpu_device *adev)
6091{
6092 struct pci_dev *pdev = adev->pdev;
e0f943b4 6093 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6094
2c1c7ba4 6095 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6096 drm_dev_unplug(ddev);
6097
6098 amdgpu_irq_disable_all(adev);
6099
6100 amdgpu_fence_driver_hw_fini(adev);
6101
6102 adev->no_hw_access = true;
6103
6104 amdgpu_device_unmap_mmio(adev);
6105
6106 pci_disable_device(pdev);
6107 pci_wait_for_pending_transaction(pdev);
6108}
86700a40
XD
6109
6110u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6111 u32 reg)
6112{
6113 unsigned long flags, address, data;
6114 u32 r;
6115
6116 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6117 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6118
6119 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6120 WREG32(address, reg * 4);
6121 (void)RREG32(address);
6122 r = RREG32(data);
6123 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6124 return r;
6125}
6126
6127void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6128 u32 reg, u32 v)
6129{
6130 unsigned long flags, address, data;
6131
6132 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6133 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6134
6135 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6136 WREG32(address, reg * 4);
6137 (void)RREG32(address);
6138 WREG32(data, v);
6139 (void)RREG32(data);
6140 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6141}
68ce8b24
CK
6142
6143/**
6144 * amdgpu_device_switch_gang - switch to a new gang
6145 * @adev: amdgpu_device pointer
6146 * @gang: the gang to switch to
6147 *
6148 * Try to switch to a new gang.
6149 * Returns: NULL if we switched to the new gang or a reference to the current
6150 * gang leader.
6151 */
6152struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6153 struct dma_fence *gang)
6154{
6155 struct dma_fence *old = NULL;
6156
6157 do {
6158 dma_fence_put(old);
6159 rcu_read_lock();
6160 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6161 rcu_read_unlock();
6162
6163 if (old == gang)
6164 break;
6165
6166 if (!dma_fence_is_signaled(old))
6167 return old;
6168
6169 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6170 old, gang) != old);
6171
6172 dma_fence_put(old);
6173 return NULL;
6174}
220c8cc8
AD
6175
6176bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6177{
6178 switch (adev->asic_type) {
6179#ifdef CONFIG_DRM_AMDGPU_SI
6180 case CHIP_HAINAN:
6181#endif
6182 case CHIP_TOPAZ:
6183 /* chips with no display hardware */
6184 return false;
6185#ifdef CONFIG_DRM_AMDGPU_SI
6186 case CHIP_TAHITI:
6187 case CHIP_PITCAIRN:
6188 case CHIP_VERDE:
6189 case CHIP_OLAND:
6190#endif
6191#ifdef CONFIG_DRM_AMDGPU_CIK
6192 case CHIP_BONAIRE:
6193 case CHIP_HAWAII:
6194 case CHIP_KAVERI:
6195 case CHIP_KABINI:
6196 case CHIP_MULLINS:
6197#endif
6198 case CHIP_TONGA:
6199 case CHIP_FIJI:
6200 case CHIP_POLARIS10:
6201 case CHIP_POLARIS11:
6202 case CHIP_POLARIS12:
6203 case CHIP_VEGAM:
6204 case CHIP_CARRIZO:
6205 case CHIP_STONEY:
6206 /* chips with display hardware */
6207 return true;
6208 default:
6209 /* IP discovery */
6210 if (!adev->ip_versions[DCE_HWIP][0] ||
6211 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6212 return false;
6213 return true;
6214 }
6215}
81283fee
JZ
6216
6217uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6218 uint32_t inst, uint32_t reg_addr, char reg_name[],
6219 uint32_t expected_value, uint32_t mask)
6220{
6221 uint32_t ret = 0;
6222 uint32_t old_ = 0;
6223 uint32_t tmp_ = RREG32(reg_addr);
6224 uint32_t loop = adev->usec_timeout;
6225
6226 while ((tmp_ & (mask)) != (expected_value)) {
6227 if (old_ != tmp_) {
6228 loop = adev->usec_timeout;
6229 old_ = tmp_;
6230 } else
6231 udelay(1);
6232 tmp_ = RREG32(reg_addr);
6233 loop--;
6234 if (!loop) {
6235 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6236 inst, reg_name, (uint32_t)expected_value,
6237 (uint32_t)(tmp_ & (mask)));
6238 ret = -ETIMEDOUT;
6239 break;
6240 }
6241 }
6242 return ret;
6243}